What is PHP, and why?

popular general-purpose scripting language that is especially suited to Web development. Fast, flexible, and pragmatic, PHP powers everything from your blog to the most popular websites in the world.

Rust 🚀 C 🚀 PHP

libgutenberg_post_parser.a

gutenberg_post_parser.h

Bootstrap with a skeleton

php-src

$ cd php-src/ext/ $ ./ext_skel.php \ --ext gutenberg_post_parser \ --author 'Ivan Enderlin' \ --dir /path/to/extension \ --onlyunix $ cd /path/to/extension $ ls gutenberg_post_parser tests/ .gitignore CREDITS config.m4 gutenberg_post_parser.c php_gutenberg_post_parser.h

ext_skel.php

Rebuild the configuration of the PHP source (run ./buildconf at the root of the php-src directory),

at the root of the directory), Reconfigure the build system to enable the extension, like ./configure --enable-gutenberg_post_parser ,

, Build with make ,

, Done.

php-src

phpize

phpize

php

php-cgi

phpdbg

php-config

php

$ cd /path/to/extension/gutenberg_post_parser $ # Get the bin directory for PHP utilities. $ PHP_PREFIX_BIN=$(php-config --prefix)/bin $ # Clean (except if it is the first run). $ $PHP_PREFIX_BIN/phpize --clean $ # “phpize” the extension. $ $PHP_PREFIX_BIN/phpize $ # Configure the extension for a particular PHP version. $ ./configure --with-php-config=$PHP_PREFIX_BIN/php-config $ # Compile. $ make install

config.m4

PHP_ARG_ENABLE(gutenberg_post_parser, whether to enable gutenberg_post_parser support, [ --with-gutenberg_post_parser Include gutenberg_post_parser support], no) if test "$PHP_GUTENBERG_POST_PARSER" != "no"; then PHP_SUBST(GUTENBERG_POST_PARSER_SHARED_LIBADD) PHP_ADD_LIBRARY_WITH_PATH(gutenberg_post_parser, ., GUTENBERG_POST_PARSER_SHARED_LIBADD) PHP_NEW_EXTENSION(gutenberg_post_parser, gutenberg_post_parser.c, $ext_shared) fi

Register the --with-gutenberg_post_parser option in the build system, and

option in the build system, and Declare the static library to compile with, and the source of the extension itself.

libgutenberg_post_parser.a

gutenberg_post_parser.h

$ ls gutenberg_post_parser tests/ # from ext_skel .gitignore # from ext_skel CREDITS # from ext_skel config.m4 # from ext_skel (edited) gutenberg_post_parser.c # from ext_skel (will be edited) gutenberg_post_parser.h # from Rust libgutenberg_post_parser.a # from Rust php_gutenberg_post_parser.h # from ext_skel

gutenberg_post_parser.c

The module, aka the extension

gutenberg_post_parser.c

#include "php.h" #include "ext/standard/info.h" #include "php_gutenberg_post_parser.h" #include "gutenberg_post_parser.h"

gutenberg_post_parser.h

cbindgen

pub enum Node<'a> { Block { name: (Input<'a>, Input<'a>), attributes: Option<Input<'a>>, children: Vec<Node<'a>> }, Phrase(Input<'a>) }

class Gutenberg_Parser_Block { public string $namespace; public string $name; public string $attributes; public array $children; } class Gutenberg_Parser_Phrase { public string $content; } function gutenberg_post_parse(string $gutenberg_post): array;

gutenberg_post_parse

Gutenberg_Parser_Block

Gutenberg_Parser_Phrase

Declare the classes

zend_class_entry *gutenberg_parser_block_class_entry; zend_class_entry *gutenberg_parser_phrase_class_entry; zend_object_handlers gutenberg_parser_node_class_entry_handlers; typedef struct _gutenberg_parser_node { zend_object zobj; } gutenberg_parser_node;

static zend_object * create_parser_node_object (zend_class_entry *class_entry) { gutenberg_parser_node *gutenberg_parser_node_object; gutenberg_parser_node_object = ecalloc ( 1 , sizeof (*gutenberg_parser_node_object) + zend_object_properties_size (class_entry)); zend_object_std_init (&gutenberg_parser_node_object-> zobj , class_entry); object_properties_init (&gutenberg_parser_node_object-> zobj , class_entry); gutenberg_parser_node_object-> zobj . handlers = &gutenberg_parser_node_class_entry_handlers; return &gutenberg_parser_node_object-> zobj ; }

static void destroy_parser_node_object (zend_object *gutenberg_parser_node_object) { zend_objects_destroy_object (gutenberg_parser_node_object); } static void free_parser_node_object (zend_object *gutenberg_parser_node_object) { zend_object_std_dtor (gutenberg_parser_node_object); }

PHP_MINIT_FUNCTION (gutenberg_post_parser) { zend_class_entry class_entry; // Declare Gutenberg_Parser_Block. INIT_CLASS_ENTRY (class_entry, " Gutenberg_Parser_Block " , NULL ); gutenberg_parser_block_class_entry = zend_register_internal_class (&class_entry TSRMLS_CC); // Declare the create handler. gutenberg_parser_block_class_entry-> create_object = create_parser_node_object; // The class is final. gutenberg_parser_block_class_entry-> ce_flags |= ZEND_ACC_FINAL; // Declare the `namespace` public attribute, // with an empty string for the default value. zend_declare_property_string (gutenberg_parser_block_class_entry, " namespace " , sizeof( " namespace " ) - 1 , " " , ZEND_ACC_PUBLIC); // Declare the `name` public attribute, // with an empty string for the default value. zend_declare_property_string (gutenberg_parser_block_class_entry, " name " , sizeof( " name " ) - 1 , " " , ZEND_ACC_PUBLIC); // Declare the `attributes` public attribute, // with `NULL` for the default value. zend_declare_property_null (gutenberg_parser_block_class_entry, " attributes " , sizeof( " attributes " ) - 1 , ZEND_ACC_PUBLIC); // Declare the `children` public attribute, // with `NULL` for the default value. zend_declare_property_null (gutenberg_parser_block_class_entry, " children " , sizeof( " children " ) - 1 , ZEND_ACC_PUBLIC); // Declare the Gutenberg_Parser_Block. … skip … // Declare Gutenberg parser node object handlers. memcpy (&gutenberg_parser_node_class_entry_handlers, zend_get_std_object_handlers (), sizeof(gutenberg_parser_node_class_entry_handlers)); gutenberg_parser_node_class_entry_handlers. offset = XtOffsetOf (gutenberg_parser_node, zobj); gutenberg_parser_node_class_entry_handlers. dtor_obj = destroy_parser_node_object; gutenberg_parser_node_class_entry_handlers. free_obj = free_parser_node_object; return SUCCESS; }

PHP_RINIT_FUNCTION

PHP_MINFO_FUNCTION

ext_skel.php

The gutenberg_post_parse function

gutenberg_post_parse

false

Gutenberg_Parser_Block

Gutenberg_Parser_Phrase

PHP_FUNCTION (gutenberg_post_parse) { char *input; size_t input_len; // Read the input as a string. if ( zend_parse_parameters ( ZEND_NUM_ARGS () TSRMLS_CC, " s " , &input, &input_len) == FAILURE) { return; }

"s"

input

input_len

input

// Parse the input. Result parser_result = parse(input); // If parsing failed, then return false. if (parser_result.tag == Err) { RETURN_FALSE; } // Else map the Rust AST into a PHP array. const Vector_Node nodes = parse_result.ok._0;

Result

parse

RETURN_FALSE

false

Vector_Node

// Note: return_value is a “magic” variable that holds the value to be returned. // // Allocate an array. array_init_size(return_value, nodes.length); // Map the Rust AST. into_php_objects(return_value, &nodes); }

into_php_objects

The into_php_objects function

Block

Gutenberg_Parser_Block

Phrase

Gutenberg_Parser_Phrase

void into_php_objects(zval *php_array, const Vector_Node *nodes) { const uintptr_t number_of_nodes = nodes->length; if (number_of_nodes == 0) { return; } // Iterate over all nodes. for (uintptr_t nth = 0; nth < number_of_nodes; ++nth) { const Node node = nodes->buffer[nth]; if (node.tag == Block) { // Map Block into Gutenberg_Parser_Block. } else if (node.tag == Phrase) { // Map Phrase into Gutenberg_Parser_Phrase. } } }

Allocate PHP strings for the block namespace, and for the block name, Allocate an object, Set the block namespace and the block name to their respective object properties, Allocate a PHP string for the block attributes if any, Set the block attributes to its respective object property, If any children, initialise a new array, and call into_php_objects with the child nodes and the new array, Set the children to its respective object property, Finally, add the block object inside the array to be returned.

const Block_Body block = node.block; zval php_block, php_block_namespace, php_block_name; // 1. Prepare the PHP strings. ZVAL_STRINGL (&php_block_namespace, block. namespace . pointer , block. namespace . length ); ZVAL_STRINGL (&php_block_name, block. name . pointer , block. name . length );

Slice_c_char

// 2. Create the Gutenberg_Parser_Block object. object_init_ex(&php_block, gutenberg_parser_block_class_entry);

gutenberg_parser_block_class_entry

// 3. Set the namespace and the name. add_property_zval (&php_block, " namespace " , &php_block_namespace); add_property_zval (&php_block, " name " , &php_block_name); zval_ptr_dtor (&php_block_namespace); zval_ptr_dtor (&php_block_name);

zval_ptr_dtor

// 4. Deal with block attributes if some. if (block.attributes.tag == Some) { Slice_c_char attributes = block.attributes.some._0; zval php_block_attributes; ZVAL_STRINGL (&php_block_attributes, attributes. pointer , attributes. length ); // 5. Set the attributes. add_property_zval (&php_block, " attributes " , &php_block_attributes); zval_ptr_dtor (&php_block_attributes); }

namespace

name

// 6. Handle children. const Vector_Node *children = (const Vector_Node*) (block.children); if (children->length > 0) { zval php_children_array; array_init_size (&php_children_array, children-> length ); // Recursion. into_php_objects (&php_children_array, children); // 7. Set the children. add_property_zval (&php_block, " children " , &php_children_array); Z_DELREF (php_children_array); } free((void*) children);

// 8. Insert the object in the collection. add_next_index_zval(php_array, &php_block);

PHP extension 🚀 PHP userland

phpize

gutenberg_post_parser.so

$ php-config --extension-dir

/usr/local/Cellar/php/7.2.11/pecl/20170718

$ php -d extension=gutenberg_post_parser -m | \ grep gutenberg_post_parser

php.ini

php --ini

extension=gutenberg_post_parser

$ php --re gutenberg_post_parser Extension [ <persistent> extension #64 gutenberg_post_parser version 0.1.0 ] { - Functions { Function [ <internal:gutenberg_post_parser> function gutenberg_post_parse ] { - Parameters [1] { Parameter #0 [ <required> $gutenberg_post_as_string ] } } } - Classes [2] { Class [ <internal:gutenberg_post_parser> final class Gutenberg_Parser_Block ] { - Constants [0] { } - Static properties [0] { } - Static methods [0] { } - Properties [4] { Property [ <default> public $namespace ] Property [ <default> public $name ] Property [ <default> public $attributes ] Property [ <default> public $children ] } - Methods [0] { } } Class [ <internal:gutenberg_post_parser> final class Gutenberg_Parser_Phrase ] { - Constants [0] { } - Static properties [0] { } - Static methods [0] { } - Properties [1] { Property [ <default> public $content ] } - Methods [0] { } } } }

<?php var_dump( gutenberg_post_parse( '<!-- wp:foo /-->bar<!-- wp:baz -->qux<!-- /wp:baz --> ' ) ); /** * Will output: * array(3) { * [0]=> * object(Gutenberg_Parser_Block)#1 (4) { * ["namespace"]=> * string(4) "core" * ["name"]=> * string(3) "foo" * ["attributes"]=> * NULL * ["children"]=> * NULL * } * [1]=> * object(Gutenberg_Parser_Phrase)#2 (1) { * ["content"]=> * string(3) "bar" * } * [2]=> * object(Gutenberg_Parser_Block)#3 (4) { * ["namespace"]=> * string(4) "core" * ["name"]=> * string(3) "baz" * ["attributes"]=> * NULL * ["children"]=> * array(1) { * [0]=> * object(Gutenberg_Parser_Phrase)#4 (1) { * ["content"]=> * string(3) "qux" * } * } * } * } */

Conclusion

A string written in PHP,

Allocated by the Zend Engine from the Gutenberg extension,

Passed to Rust through FFI (static library + header),

Back to Zend Engine in the Gutenberg extension,

To generate PHP objects,

That are read by PHP.

file PEG PHP parser (ms) Rust parser as a PHP extension (ms) speedup demo-post.html 30.409 0.0012 × 25341 shortcode-shortcomings.html 76.39 0.096 × 796 redesigning-chrome-desktop.html 225.824 0.399 × 566 web-at-maximum-fps.html 173.495 0.275 × 631 early-adopting-the-future.html 280.433 0.298 × 941 pygmalian-raw-html.html 377.392 0.052 × 7258 moby-dick-parsed.html 5,437.630 5.037 × 1080