405 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			405 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
| <?php
 | |
| /**
 | |
|  * Block Serialization Parser
 | |
|  *
 | |
|  * @package WordPress
 | |
|  */
 | |
| 
 | |
| /**
 | |
|  * Class WP_Block_Parser
 | |
|  *
 | |
|  * Parses a document and constructs a list of parsed block objects
 | |
|  *
 | |
|  * @since 5.0.0
 | |
|  * @since 4.0.0 returns arrays not objects, all attributes are arrays
 | |
|  */
 | |
| class WP_Block_Parser {
 | |
| 	/**
 | |
| 	 * Input document being parsed
 | |
| 	 *
 | |
| 	 * @example "Pre-text\n<!-- wp:paragraph -->This is inside a block!<!-- /wp:paragraph -->"
 | |
| 	 *
 | |
| 	 * @since 5.0.0
 | |
| 	 * @var string
 | |
| 	 */
 | |
| 	public $document;
 | |
| 
 | |
| 	/**
 | |
| 	 * Tracks parsing progress through document
 | |
| 	 *
 | |
| 	 * @since 5.0.0
 | |
| 	 * @var int
 | |
| 	 */
 | |
| 	public $offset;
 | |
| 
 | |
| 	/**
 | |
| 	 * List of parsed blocks
 | |
| 	 *
 | |
| 	 * @since 5.0.0
 | |
| 	 * @var WP_Block_Parser_Block[]
 | |
| 	 */
 | |
| 	public $output;
 | |
| 
 | |
| 	/**
 | |
| 	 * Stack of partially-parsed structures in memory during parse
 | |
| 	 *
 | |
| 	 * @since 5.0.0
 | |
| 	 * @var WP_Block_Parser_Frame[]
 | |
| 	 */
 | |
| 	public $stack;
 | |
| 
 | |
| 	/**
 | |
| 	 * Parses a document and returns a list of block structures
 | |
| 	 *
 | |
| 	 * When encountering an invalid parse will return a best-effort
 | |
| 	 * parse. In contrast to the specification parser this does not
 | |
| 	 * return an error on invalid inputs.
 | |
| 	 *
 | |
| 	 * @since 5.0.0
 | |
| 	 *
 | |
| 	 * @param string $document Input document being parsed.
 | |
| 	 * @return array[]
 | |
| 	 */
 | |
| 	public function parse( $document ) {
 | |
| 		$this->document = $document;
 | |
| 		$this->offset   = 0;
 | |
| 		$this->output   = array();
 | |
| 		$this->stack    = array();
 | |
| 
 | |
| 		while ( $this->proceed() ) {
 | |
| 			continue;
 | |
| 		}
 | |
| 
 | |
| 		return $this->output;
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Processes the next token from the input document
 | |
| 	 * and returns whether to proceed eating more tokens
 | |
| 	 *
 | |
| 	 * This is the "next step" function that essentially
 | |
| 	 * takes a token as its input and decides what to do
 | |
| 	 * with that token before descending deeper into a
 | |
| 	 * nested block tree or continuing along the document
 | |
| 	 * or breaking out of a level of nesting.
 | |
| 	 *
 | |
| 	 * @internal
 | |
| 	 * @since 5.0.0
 | |
| 	 * @return bool
 | |
| 	 */
 | |
| 	public function proceed() {
 | |
| 		$next_token = $this->next_token();
 | |
| 		list( $token_type, $block_name, $attrs, $start_offset, $token_length ) = $next_token;
 | |
| 		$stack_depth = count( $this->stack );
 | |
| 
 | |
| 		// we may have some HTML soup before the next block.
 | |
| 		$leading_html_start = $start_offset > $this->offset ? $this->offset : null;
 | |
| 
 | |
| 		switch ( $token_type ) {
 | |
| 			case 'no-more-tokens':
 | |
| 				// if not in a block then flush output.
 | |
| 				if ( 0 === $stack_depth ) {
 | |
| 					$this->add_freeform();
 | |
| 					return false;
 | |
| 				}
 | |
| 
 | |
| 				/*
 | |
| 				 * Otherwise we have a problem
 | |
| 				 * This is an error
 | |
| 				 *
 | |
| 				 * we have options
 | |
| 				 * - treat it all as freeform text
 | |
| 				 * - assume an implicit closer (easiest when not nesting)
 | |
| 				 */
 | |
| 
 | |
| 				// for the easy case we'll assume an implicit closer.
 | |
| 				if ( 1 === $stack_depth ) {
 | |
| 					$this->add_block_from_stack();
 | |
| 					return false;
 | |
| 				}
 | |
| 
 | |
| 				/*
 | |
| 				 * for the nested case where it's more difficult we'll
 | |
| 				 * have to assume that multiple closers are missing
 | |
| 				 * and so we'll collapse the whole stack piecewise
 | |
| 				 */
 | |
| 				while ( 0 < count( $this->stack ) ) {
 | |
| 					$this->add_block_from_stack();
 | |
| 				}
 | |
| 				return false;
 | |
| 
 | |
| 			case 'void-block':
 | |
| 				/*
 | |
| 				 * easy case is if we stumbled upon a void block
 | |
| 				 * in the top-level of the document
 | |
| 				 */
 | |
| 				if ( 0 === $stack_depth ) {
 | |
| 					if ( isset( $leading_html_start ) ) {
 | |
| 						$this->output[] = (array) $this->freeform(
 | |
| 							substr(
 | |
| 								$this->document,
 | |
| 								$leading_html_start,
 | |
| 								$start_offset - $leading_html_start
 | |
| 							)
 | |
| 						);
 | |
| 					}
 | |
| 
 | |
| 					$this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() );
 | |
| 					$this->offset   = $start_offset + $token_length;
 | |
| 					return true;
 | |
| 				}
 | |
| 
 | |
| 				// otherwise we found an inner block.
 | |
| 				$this->add_inner_block(
 | |
| 					new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
 | |
| 					$start_offset,
 | |
| 					$token_length
 | |
| 				);
 | |
| 				$this->offset = $start_offset + $token_length;
 | |
| 				return true;
 | |
| 
 | |
| 			case 'block-opener':
 | |
| 				// track all newly-opened blocks on the stack.
 | |
| 				array_push(
 | |
| 					$this->stack,
 | |
| 					new WP_Block_Parser_Frame(
 | |
| 						new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
 | |
| 						$start_offset,
 | |
| 						$token_length,
 | |
| 						$start_offset + $token_length,
 | |
| 						$leading_html_start
 | |
| 					)
 | |
| 				);
 | |
| 				$this->offset = $start_offset + $token_length;
 | |
| 				return true;
 | |
| 
 | |
| 			case 'block-closer':
 | |
| 				/*
 | |
| 				 * if we're missing an opener we're in trouble
 | |
| 				 * This is an error
 | |
| 				 */
 | |
| 				if ( 0 === $stack_depth ) {
 | |
| 					/*
 | |
| 					 * we have options
 | |
| 					 * - assume an implicit opener
 | |
| 					 * - assume _this_ is the opener
 | |
| 					 * - give up and close out the document
 | |
| 					 */
 | |
| 					$this->add_freeform();
 | |
| 					return false;
 | |
| 				}
 | |
| 
 | |
| 				// if we're not nesting then this is easy - close the block.
 | |
| 				if ( 1 === $stack_depth ) {
 | |
| 					$this->add_block_from_stack( $start_offset );
 | |
| 					$this->offset = $start_offset + $token_length;
 | |
| 					return true;
 | |
| 				}
 | |
| 
 | |
| 				/*
 | |
| 				 * otherwise we're nested and we have to close out the current
 | |
| 				 * block and add it as a new innerBlock to the parent
 | |
| 				 */
 | |
| 				$stack_top                        = array_pop( $this->stack );
 | |
| 				$html                             = substr( $this->document, $stack_top->prev_offset, $start_offset - $stack_top->prev_offset );
 | |
| 				$stack_top->block->innerHTML     .= $html;
 | |
| 				$stack_top->block->innerContent[] = $html;
 | |
| 				$stack_top->prev_offset           = $start_offset + $token_length;
 | |
| 
 | |
| 				$this->add_inner_block(
 | |
| 					$stack_top->block,
 | |
| 					$stack_top->token_start,
 | |
| 					$stack_top->token_length,
 | |
| 					$start_offset + $token_length
 | |
| 				);
 | |
| 				$this->offset = $start_offset + $token_length;
 | |
| 				return true;
 | |
| 
 | |
| 			default:
 | |
| 				// This is an error.
 | |
| 				$this->add_freeform();
 | |
| 				return false;
 | |
| 		}
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Scans the document from where we last left off
 | |
| 	 * and finds the next valid token to parse if it exists
 | |
| 	 *
 | |
| 	 * Returns the type of the find: kind of find, block information, attributes
 | |
| 	 *
 | |
| 	 * @internal
 | |
| 	 * @since 5.0.0
 | |
| 	 * @since 4.6.1 fixed a bug in attribute parsing which caused catastrophic backtracking on invalid block comments
 | |
| 	 * @return array
 | |
| 	 */
 | |
| 	public function next_token() {
 | |
| 		$matches = null;
 | |
| 
 | |
| 		/*
 | |
| 		 * aye the magic
 | |
| 		 * we're using a single RegExp to tokenize the block comment delimiters
 | |
| 		 * we're also using a trick here because the only difference between a
 | |
| 		 * block opener and a block closer is the leading `/` before `wp:` (and
 | |
| 		 * a closer has no attributes). we can trap them both and process the
 | |
| 		 * match back in PHP to see which one it was.
 | |
| 		 */
 | |
| 		$has_match = preg_match(
 | |
| 			'/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s',
 | |
| 			$this->document,
 | |
| 			$matches,
 | |
| 			PREG_OFFSET_CAPTURE,
 | |
| 			$this->offset
 | |
| 		);
 | |
| 
 | |
| 		// if we get here we probably have catastrophic backtracking or out-of-memory in the PCRE.
 | |
| 		if ( false === $has_match ) {
 | |
| 			return array( 'no-more-tokens', null, null, null, null );
 | |
| 		}
 | |
| 
 | |
| 		// we have no more tokens.
 | |
| 		if ( 0 === $has_match ) {
 | |
| 			return array( 'no-more-tokens', null, null, null, null );
 | |
| 		}
 | |
| 
 | |
| 		list( $match, $started_at ) = $matches[0];
 | |
| 
 | |
| 		$length    = strlen( $match );
 | |
| 		$is_closer = isset( $matches['closer'] ) && -1 !== $matches['closer'][1];
 | |
| 		$is_void   = isset( $matches['void'] ) && -1 !== $matches['void'][1];
 | |
| 		$namespace = $matches['namespace'];
 | |
| 		$namespace = ( isset( $namespace ) && -1 !== $namespace[1] ) ? $namespace[0] : 'core/';
 | |
| 		$name      = $namespace . $matches['name'][0];
 | |
| 		$has_attrs = isset( $matches['attrs'] ) && -1 !== $matches['attrs'][1];
 | |
| 
 | |
| 		/*
 | |
| 		 * Fun fact! It's not trivial in PHP to create "an empty associative array" since all arrays
 | |
| 		 * are associative arrays. If we use `array()` we get a JSON `[]`
 | |
| 		 */
 | |
| 		$attrs = $has_attrs
 | |
| 			? json_decode( $matches['attrs'][0], /* as-associative */ true )
 | |
| 			: array();
 | |
| 
 | |
| 		/*
 | |
| 		 * This state isn't allowed
 | |
| 		 * This is an error
 | |
| 		 */
 | |
| 		if ( $is_closer && ( $is_void || $has_attrs ) ) {
 | |
| 			// we can ignore them since they don't hurt anything.
 | |
| 		}
 | |
| 
 | |
| 		if ( $is_void ) {
 | |
| 			return array( 'void-block', $name, $attrs, $started_at, $length );
 | |
| 		}
 | |
| 
 | |
| 		if ( $is_closer ) {
 | |
| 			return array( 'block-closer', $name, null, $started_at, $length );
 | |
| 		}
 | |
| 
 | |
| 		return array( 'block-opener', $name, $attrs, $started_at, $length );
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Returns a new block object for freeform HTML
 | |
| 	 *
 | |
| 	 * @internal
 | |
| 	 * @since 3.9.0
 | |
| 	 *
 | |
| 	 * @param string $inner_html HTML content of block.
 | |
| 	 * @return WP_Block_Parser_Block freeform block object.
 | |
| 	 */
 | |
| 	public function freeform( $inner_html ) {
 | |
| 		return new WP_Block_Parser_Block( null, array(), array(), $inner_html, array( $inner_html ) );
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Pushes a length of text from the input document
 | |
| 	 * to the output list as a freeform block.
 | |
| 	 *
 | |
| 	 * @internal
 | |
| 	 * @since 5.0.0
 | |
| 	 * @param null $length how many bytes of document text to output.
 | |
| 	 */
 | |
| 	public function add_freeform( $length = null ) {
 | |
| 		$length = $length ? $length : strlen( $this->document ) - $this->offset;
 | |
| 
 | |
| 		if ( 0 === $length ) {
 | |
| 			return;
 | |
| 		}
 | |
| 
 | |
| 		$this->output[] = (array) $this->freeform( substr( $this->document, $this->offset, $length ) );
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Given a block structure from memory pushes
 | |
| 	 * a new block to the output list.
 | |
| 	 *
 | |
| 	 * @internal
 | |
| 	 * @since 5.0.0
 | |
| 	 * @param WP_Block_Parser_Block $block        The block to add to the output.
 | |
| 	 * @param int                   $token_start  Byte offset into the document where the first token for the block starts.
 | |
| 	 * @param int                   $token_length Byte length of entire block from start of opening token to end of closing token.
 | |
| 	 * @param int|null              $last_offset  Last byte offset into document if continuing form earlier output.
 | |
| 	 */
 | |
| 	public function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) {
 | |
| 		$parent                       = $this->stack[ count( $this->stack ) - 1 ];
 | |
| 		$parent->block->innerBlocks[] = (array) $block;
 | |
| 		$html                         = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset );
 | |
| 
 | |
| 		if ( ! empty( $html ) ) {
 | |
| 			$parent->block->innerHTML     .= $html;
 | |
| 			$parent->block->innerContent[] = $html;
 | |
| 		}
 | |
| 
 | |
| 		$parent->block->innerContent[] = null;
 | |
| 		$parent->prev_offset           = $last_offset ? $last_offset : $token_start + $token_length;
 | |
| 	}
 | |
| 
 | |
| 	/**
 | |
| 	 * Pushes the top block from the parsing stack to the output list.
 | |
| 	 *
 | |
| 	 * @internal
 | |
| 	 * @since 5.0.0
 | |
| 	 * @param int|null $end_offset byte offset into document for where we should stop sending text output as HTML.
 | |
| 	 */
 | |
| 	public function add_block_from_stack( $end_offset = null ) {
 | |
| 		$stack_top   = array_pop( $this->stack );
 | |
| 		$prev_offset = $stack_top->prev_offset;
 | |
| 
 | |
| 		$html = isset( $end_offset )
 | |
| 			? substr( $this->document, $prev_offset, $end_offset - $prev_offset )
 | |
| 			: substr( $this->document, $prev_offset );
 | |
| 
 | |
| 		if ( ! empty( $html ) ) {
 | |
| 			$stack_top->block->innerHTML     .= $html;
 | |
| 			$stack_top->block->innerContent[] = $html;
 | |
| 		}
 | |
| 
 | |
| 		if ( isset( $stack_top->leading_html_start ) ) {
 | |
| 			$this->output[] = (array) $this->freeform(
 | |
| 				substr(
 | |
| 					$this->document,
 | |
| 					$stack_top->leading_html_start,
 | |
| 					$stack_top->token_start - $stack_top->leading_html_start
 | |
| 				)
 | |
| 			);
 | |
| 		}
 | |
| 
 | |
| 		$this->output[] = (array) $stack_top->block;
 | |
| 	}
 | |
| }
 | |
| 
 | |
| /**
 | |
|  * WP_Block_Parser_Block class.
 | |
|  *
 | |
|  * Required for backward compatibility in WordPress Core.
 | |
|  */
 | |
| require_once __DIR__ . '/class-wp-block-parser-block.php';
 | |
| 
 | |
| /**
 | |
|  * WP_Block_Parser_Frame class.
 | |
|  *
 | |
|  * Required for backward compatibility in WordPress Core.
 | |
|  */
 | |
| require_once __DIR__ . '/class-wp-block-parser-frame.php';
 |