405 lines
11 KiB
PHP
405 lines
11 KiB
PHP
|
<?php
|
||
|
/**
|
||
|
* Block Serialization Parser
|
||
|
*
|
||
|
* @package WordPress
|
||
|
*/
|
||
|
|
||
|
/**
|
||
|
* Class WP_Block_Parser
|
||
|
*
|
||
|
* Parses a document and constructs a list of parsed block objects
|
||
|
*
|
||
|
* @since 5.0.0
|
||
|
* @since 4.0.0 returns arrays not objects, all attributes are arrays
|
||
|
*/
|
||
|
class WP_Block_Parser {
|
||
|
/**
|
||
|
* Input document being parsed
|
||
|
*
|
||
|
* @example "Pre-text\n<!-- wp:paragraph -->This is inside a block!<!-- /wp:paragraph -->"
|
||
|
*
|
||
|
* @since 5.0.0
|
||
|
* @var string
|
||
|
*/
|
||
|
public $document;
|
||
|
|
||
|
/**
|
||
|
* Tracks parsing progress through document
|
||
|
*
|
||
|
* @since 5.0.0
|
||
|
* @var int
|
||
|
*/
|
||
|
public $offset;
|
||
|
|
||
|
/**
|
||
|
* List of parsed blocks
|
||
|
*
|
||
|
* @since 5.0.0
|
||
|
* @var WP_Block_Parser_Block[]
|
||
|
*/
|
||
|
public $output;
|
||
|
|
||
|
/**
|
||
|
* Stack of partially-parsed structures in memory during parse
|
||
|
*
|
||
|
* @since 5.0.0
|
||
|
* @var WP_Block_Parser_Frame[]
|
||
|
*/
|
||
|
public $stack;
|
||
|
|
||
|
/**
|
||
|
* Parses a document and returns a list of block structures
|
||
|
*
|
||
|
* When encountering an invalid parse will return a best-effort
|
||
|
* parse. In contrast to the specification parser this does not
|
||
|
* return an error on invalid inputs.
|
||
|
*
|
||
|
* @since 5.0.0
|
||
|
*
|
||
|
* @param string $document Input document being parsed.
|
||
|
* @return array[]
|
||
|
*/
|
||
|
public function parse( $document ) {
|
||
|
$this->document = $document;
|
||
|
$this->offset = 0;
|
||
|
$this->output = array();
|
||
|
$this->stack = array();
|
||
|
|
||
|
while ( $this->proceed() ) {
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
return $this->output;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Processes the next token from the input document
|
||
|
* and returns whether to proceed eating more tokens
|
||
|
*
|
||
|
* This is the "next step" function that essentially
|
||
|
* takes a token as its input and decides what to do
|
||
|
* with that token before descending deeper into a
|
||
|
* nested block tree or continuing along the document
|
||
|
* or breaking out of a level of nesting.
|
||
|
*
|
||
|
* @internal
|
||
|
* @since 5.0.0
|
||
|
* @return bool
|
||
|
*/
|
||
|
public function proceed() {
|
||
|
$next_token = $this->next_token();
|
||
|
list( $token_type, $block_name, $attrs, $start_offset, $token_length ) = $next_token;
|
||
|
$stack_depth = count( $this->stack );
|
||
|
|
||
|
// we may have some HTML soup before the next block.
|
||
|
$leading_html_start = $start_offset > $this->offset ? $this->offset : null;
|
||
|
|
||
|
switch ( $token_type ) {
|
||
|
case 'no-more-tokens':
|
||
|
// if not in a block then flush output.
|
||
|
if ( 0 === $stack_depth ) {
|
||
|
$this->add_freeform();
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* Otherwise we have a problem
|
||
|
* This is an error
|
||
|
*
|
||
|
* we have options
|
||
|
* - treat it all as freeform text
|
||
|
* - assume an implicit closer (easiest when not nesting)
|
||
|
*/
|
||
|
|
||
|
// for the easy case we'll assume an implicit closer.
|
||
|
if ( 1 === $stack_depth ) {
|
||
|
$this->add_block_from_stack();
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* for the nested case where it's more difficult we'll
|
||
|
* have to assume that multiple closers are missing
|
||
|
* and so we'll collapse the whole stack piecewise
|
||
|
*/
|
||
|
while ( 0 < count( $this->stack ) ) {
|
||
|
$this->add_block_from_stack();
|
||
|
}
|
||
|
return false;
|
||
|
|
||
|
case 'void-block':
|
||
|
/*
|
||
|
* easy case is if we stumbled upon a void block
|
||
|
* in the top-level of the document
|
||
|
*/
|
||
|
if ( 0 === $stack_depth ) {
|
||
|
if ( isset( $leading_html_start ) ) {
|
||
|
$this->output[] = (array) $this->freeform(
|
||
|
substr(
|
||
|
$this->document,
|
||
|
$leading_html_start,
|
||
|
$start_offset - $leading_html_start
|
||
|
)
|
||
|
);
|
||
|
}
|
||
|
|
||
|
$this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() );
|
||
|
$this->offset = $start_offset + $token_length;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
// otherwise we found an inner block.
|
||
|
$this->add_inner_block(
|
||
|
new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
|
||
|
$start_offset,
|
||
|
$token_length
|
||
|
);
|
||
|
$this->offset = $start_offset + $token_length;
|
||
|
return true;
|
||
|
|
||
|
case 'block-opener':
|
||
|
// track all newly-opened blocks on the stack.
|
||
|
array_push(
|
||
|
$this->stack,
|
||
|
new WP_Block_Parser_Frame(
|
||
|
new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
|
||
|
$start_offset,
|
||
|
$token_length,
|
||
|
$start_offset + $token_length,
|
||
|
$leading_html_start
|
||
|
)
|
||
|
);
|
||
|
$this->offset = $start_offset + $token_length;
|
||
|
return true;
|
||
|
|
||
|
case 'block-closer':
|
||
|
/*
|
||
|
* if we're missing an opener we're in trouble
|
||
|
* This is an error
|
||
|
*/
|
||
|
if ( 0 === $stack_depth ) {
|
||
|
/*
|
||
|
* we have options
|
||
|
* - assume an implicit opener
|
||
|
* - assume _this_ is the opener
|
||
|
* - give up and close out the document
|
||
|
*/
|
||
|
$this->add_freeform();
|
||
|
return false;
|
||
|
}
|
||
|
|
||
|
// if we're not nesting then this is easy - close the block.
|
||
|
if ( 1 === $stack_depth ) {
|
||
|
$this->add_block_from_stack( $start_offset );
|
||
|
$this->offset = $start_offset + $token_length;
|
||
|
return true;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* otherwise we're nested and we have to close out the current
|
||
|
* block and add it as a new innerBlock to the parent
|
||
|
*/
|
||
|
$stack_top = array_pop( $this->stack );
|
||
|
$html = substr( $this->document, $stack_top->prev_offset, $start_offset - $stack_top->prev_offset );
|
||
|
$stack_top->block->innerHTML .= $html;
|
||
|
$stack_top->block->innerContent[] = $html;
|
||
|
$stack_top->prev_offset = $start_offset + $token_length;
|
||
|
|
||
|
$this->add_inner_block(
|
||
|
$stack_top->block,
|
||
|
$stack_top->token_start,
|
||
|
$stack_top->token_length,
|
||
|
$start_offset + $token_length
|
||
|
);
|
||
|
$this->offset = $start_offset + $token_length;
|
||
|
return true;
|
||
|
|
||
|
default:
|
||
|
// This is an error.
|
||
|
$this->add_freeform();
|
||
|
return false;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Scans the document from where we last left off
|
||
|
* and finds the next valid token to parse if it exists
|
||
|
*
|
||
|
* Returns the type of the find: kind of find, block information, attributes
|
||
|
*
|
||
|
* @internal
|
||
|
* @since 5.0.0
|
||
|
* @since 4.6.1 fixed a bug in attribute parsing which caused catastrophic backtracking on invalid block comments
|
||
|
* @return array
|
||
|
*/
|
||
|
public function next_token() {
|
||
|
$matches = null;
|
||
|
|
||
|
/*
|
||
|
* aye the magic
|
||
|
* we're using a single RegExp to tokenize the block comment delimiters
|
||
|
* we're also using a trick here because the only difference between a
|
||
|
* block opener and a block closer is the leading `/` before `wp:` (and
|
||
|
* a closer has no attributes). we can trap them both and process the
|
||
|
* match back in PHP to see which one it was.
|
||
|
*/
|
||
|
$has_match = preg_match(
|
||
|
'/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s',
|
||
|
$this->document,
|
||
|
$matches,
|
||
|
PREG_OFFSET_CAPTURE,
|
||
|
$this->offset
|
||
|
);
|
||
|
|
||
|
// if we get here we probably have catastrophic backtracking or out-of-memory in the PCRE.
|
||
|
if ( false === $has_match ) {
|
||
|
return array( 'no-more-tokens', null, null, null, null );
|
||
|
}
|
||
|
|
||
|
// we have no more tokens.
|
||
|
if ( 0 === $has_match ) {
|
||
|
return array( 'no-more-tokens', null, null, null, null );
|
||
|
}
|
||
|
|
||
|
list( $match, $started_at ) = $matches[0];
|
||
|
|
||
|
$length = strlen( $match );
|
||
|
$is_closer = isset( $matches['closer'] ) && -1 !== $matches['closer'][1];
|
||
|
$is_void = isset( $matches['void'] ) && -1 !== $matches['void'][1];
|
||
|
$namespace = $matches['namespace'];
|
||
|
$namespace = ( isset( $namespace ) && -1 !== $namespace[1] ) ? $namespace[0] : 'core/';
|
||
|
$name = $namespace . $matches['name'][0];
|
||
|
$has_attrs = isset( $matches['attrs'] ) && -1 !== $matches['attrs'][1];
|
||
|
|
||
|
/*
|
||
|
* Fun fact! It's not trivial in PHP to create "an empty associative array" since all arrays
|
||
|
* are associative arrays. If we use `array()` we get a JSON `[]`
|
||
|
*/
|
||
|
$attrs = $has_attrs
|
||
|
? json_decode( $matches['attrs'][0], /* as-associative */ true )
|
||
|
: array();
|
||
|
|
||
|
/*
|
||
|
* This state isn't allowed
|
||
|
* This is an error
|
||
|
*/
|
||
|
if ( $is_closer && ( $is_void || $has_attrs ) ) {
|
||
|
// we can ignore them since they don't hurt anything.
|
||
|
}
|
||
|
|
||
|
if ( $is_void ) {
|
||
|
return array( 'void-block', $name, $attrs, $started_at, $length );
|
||
|
}
|
||
|
|
||
|
if ( $is_closer ) {
|
||
|
return array( 'block-closer', $name, null, $started_at, $length );
|
||
|
}
|
||
|
|
||
|
return array( 'block-opener', $name, $attrs, $started_at, $length );
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Returns a new block object for freeform HTML
|
||
|
*
|
||
|
* @internal
|
||
|
* @since 3.9.0
|
||
|
*
|
||
|
* @param string $inner_html HTML content of block.
|
||
|
* @return WP_Block_Parser_Block freeform block object.
|
||
|
*/
|
||
|
public function freeform( $inner_html ) {
|
||
|
return new WP_Block_Parser_Block( null, array(), array(), $inner_html, array( $inner_html ) );
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Pushes a length of text from the input document
|
||
|
* to the output list as a freeform block.
|
||
|
*
|
||
|
* @internal
|
||
|
* @since 5.0.0
|
||
|
* @param null $length how many bytes of document text to output.
|
||
|
*/
|
||
|
public function add_freeform( $length = null ) {
|
||
|
$length = $length ? $length : strlen( $this->document ) - $this->offset;
|
||
|
|
||
|
if ( 0 === $length ) {
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
$this->output[] = (array) $this->freeform( substr( $this->document, $this->offset, $length ) );
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Given a block structure from memory pushes
|
||
|
* a new block to the output list.
|
||
|
*
|
||
|
* @internal
|
||
|
* @since 5.0.0
|
||
|
* @param WP_Block_Parser_Block $block The block to add to the output.
|
||
|
* @param int $token_start Byte offset into the document where the first token for the block starts.
|
||
|
* @param int $token_length Byte length of entire block from start of opening token to end of closing token.
|
||
|
* @param int|null $last_offset Last byte offset into document if continuing form earlier output.
|
||
|
*/
|
||
|
public function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) {
|
||
|
$parent = $this->stack[ count( $this->stack ) - 1 ];
|
||
|
$parent->block->innerBlocks[] = (array) $block;
|
||
|
$html = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset );
|
||
|
|
||
|
if ( ! empty( $html ) ) {
|
||
|
$parent->block->innerHTML .= $html;
|
||
|
$parent->block->innerContent[] = $html;
|
||
|
}
|
||
|
|
||
|
$parent->block->innerContent[] = null;
|
||
|
$parent->prev_offset = $last_offset ? $last_offset : $token_start + $token_length;
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* Pushes the top block from the parsing stack to the output list.
|
||
|
*
|
||
|
* @internal
|
||
|
* @since 5.0.0
|
||
|
* @param int|null $end_offset byte offset into document for where we should stop sending text output as HTML.
|
||
|
*/
|
||
|
public function add_block_from_stack( $end_offset = null ) {
|
||
|
$stack_top = array_pop( $this->stack );
|
||
|
$prev_offset = $stack_top->prev_offset;
|
||
|
|
||
|
$html = isset( $end_offset )
|
||
|
? substr( $this->document, $prev_offset, $end_offset - $prev_offset )
|
||
|
: substr( $this->document, $prev_offset );
|
||
|
|
||
|
if ( ! empty( $html ) ) {
|
||
|
$stack_top->block->innerHTML .= $html;
|
||
|
$stack_top->block->innerContent[] = $html;
|
||
|
}
|
||
|
|
||
|
if ( isset( $stack_top->leading_html_start ) ) {
|
||
|
$this->output[] = (array) $this->freeform(
|
||
|
substr(
|
||
|
$this->document,
|
||
|
$stack_top->leading_html_start,
|
||
|
$stack_top->token_start - $stack_top->leading_html_start
|
||
|
)
|
||
|
);
|
||
|
}
|
||
|
|
||
|
$this->output[] = (array) $stack_top->block;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/**
|
||
|
* WP_Block_Parser_Block class.
|
||
|
*
|
||
|
* Required for backward compatibility in WordPress Core.
|
||
|
*/
|
||
|
require_once __DIR__ . '/class-wp-block-parser-block.php';
|
||
|
|
||
|
/**
|
||
|
* WP_Block_Parser_Frame class.
|
||
|
*
|
||
|
* Required for backward compatibility in WordPress Core.
|
||
|
*/
|
||
|
require_once __DIR__ . '/class-wp-block-parser-frame.php';
|