wp/wp-includes/class-wp-block-parser.php
changeset 9 177826044cd9
child 18 be944660c56a
equal deleted inserted replaced
8:c7c34916027a 9:177826044cd9
       
     1 <?php
       
     2 /**
       
     3  * Block Serialization Parser
       
     4  *
       
     5  * @package WordPress
       
     6  */
       
     7 
       
     8 /**
       
     9  * Class WP_Block_Parser_Block
       
    10  *
       
    11  * Holds the block structure in memory
       
    12  *
       
    13  * @since 3.8.0
       
    14  */
       
    15 class WP_Block_Parser_Block {
       
    16 	/**
       
    17 	 * Name of block
       
    18 	 *
       
    19 	 * @example "core/paragraph"
       
    20 	 *
       
    21 	 * @since 3.8.0
       
    22 	 * @var string
       
    23 	 */
       
    24 	public $blockName;
       
    25 
       
    26 	/**
       
    27 	 * Optional set of attributes from block comment delimiters
       
    28 	 *
       
    29 	 * @example null
       
    30 	 * @example array( 'columns' => 3 )
       
    31 	 *
       
    32 	 * @since 3.8.0
       
    33 	 * @var array|null
       
    34 	 */
       
    35 	public $attrs;
       
    36 
       
    37 	/**
       
    38 	 * List of inner blocks (of this same class)
       
    39 	 *
       
    40 	 * @since 3.8.0
       
    41 	 * @var WP_Block_Parser_Block[]
       
    42 	 */
       
    43 	public $innerBlocks;
       
    44 
       
    45 	/**
       
    46 	 * Resultant HTML from inside block comment delimiters
       
    47 	 * after removing inner blocks
       
    48 	 *
       
    49 	 * @example "...Just <!-- wp:test /--> testing..." -> "Just testing..."
       
    50 	 *
       
    51 	 * @since 3.8.0
       
    52 	 * @var string
       
    53 	 */
       
    54 	public $innerHTML;
       
    55 
       
    56 	/**
       
    57 	 * List of string fragments and null markers where inner blocks were found
       
    58 	 *
       
    59 	 * @example array(
       
    60 	 *   'innerHTML'    => 'BeforeInnerAfter',
       
    61 	 *   'innerBlocks'  => array( block, block ),
       
    62 	 *   'innerContent' => array( 'Before', null, 'Inner', null, 'After' ),
       
    63 	 * )
       
    64 	 *
       
    65 	 * @since 4.2.0
       
    66 	 * @var array
       
    67 	 */
       
    68 	public $innerContent;
       
    69 
       
    70 	/**
       
    71 	 * Constructor.
       
    72 	 *
       
    73 	 * Will populate object properties from the provided arguments.
       
    74 	 *
       
    75 	 * @since 3.8.0
       
    76 	 *
       
    77 	 * @param string $name         Name of block.
       
    78 	 * @param array  $attrs        Optional set of attributes from block comment delimiters.
       
    79 	 * @param array  $innerBlocks  List of inner blocks (of this same class).
       
    80 	 * @param string $innerHTML    Resultant HTML from inside block comment delimiters after removing inner blocks.
       
    81 	 * @param array  $innerContent List of string fragments and null markers where inner blocks were found.
       
    82 	 */
       
    83 	function __construct( $name, $attrs, $innerBlocks, $innerHTML, $innerContent ) {
       
    84 		$this->blockName    = $name;
       
    85 		$this->attrs        = $attrs;
       
    86 		$this->innerBlocks  = $innerBlocks;
       
    87 		$this->innerHTML    = $innerHTML;
       
    88 		$this->innerContent = $innerContent;
       
    89 	}
       
    90 }
       
    91 
       
    92 /**
       
    93  * Class WP_Block_Parser_Frame
       
    94  *
       
    95  * Holds partial blocks in memory while parsing
       
    96  *
       
    97  * @internal
       
    98  * @since 3.8.0
       
    99  */
       
   100 class WP_Block_Parser_Frame {
       
   101 	/**
       
   102 	 * Full or partial block
       
   103 	 *
       
   104 	 * @since 3.8.0
       
   105 	 * @var WP_Block_Parser_Block
       
   106 	 */
       
   107 	public $block;
       
   108 
       
   109 	/**
       
   110 	 * Byte offset into document for start of parse token
       
   111 	 *
       
   112 	 * @since 3.8.0
       
   113 	 * @var int
       
   114 	 */
       
   115 	public $token_start;
       
   116 
       
   117 	/**
       
   118 	 * Byte length of entire parse token string
       
   119 	 *
       
   120 	 * @since 3.8.0
       
   121 	 * @var int
       
   122 	 */
       
   123 	public $token_length;
       
   124 
       
   125 	/**
       
   126 	 * Byte offset into document for after parse token ends
       
   127 	 * (used during reconstruction of stack into parse production)
       
   128 	 *
       
   129 	 * @since 3.8.0
       
   130 	 * @var int
       
   131 	 */
       
   132 	public $prev_offset;
       
   133 
       
   134 	/**
       
   135 	 * Byte offset into document where leading HTML before token starts
       
   136 	 *
       
   137 	 * @since 3.8.0
       
   138 	 * @var int
       
   139 	 */
       
   140 	public $leading_html_start;
       
   141 
       
   142 	/**
       
   143 	 * Constructor
       
   144 	 *
       
   145 	 * Will populate object properties from the provided arguments.
       
   146 	 *
       
   147 	 * @since 3.8.0
       
   148 	 *
       
   149 	 * @param WP_Block_Parser_Block $block              Full or partial block.
       
   150 	 * @param int                   $token_start        Byte offset into document for start of parse token.
       
   151 	 * @param int                   $token_length       Byte length of entire parse token string.
       
   152 	 * @param int                   $prev_offset        Byte offset into document for after parse token ends.
       
   153 	 * @param int                   $leading_html_start Byte offset into document where leading HTML before token starts.
       
   154 	 */
       
   155 	function __construct( $block, $token_start, $token_length, $prev_offset = null, $leading_html_start = null ) {
       
   156 		$this->block              = $block;
       
   157 		$this->token_start        = $token_start;
       
   158 		$this->token_length       = $token_length;
       
   159 		$this->prev_offset        = isset( $prev_offset ) ? $prev_offset : $token_start + $token_length;
       
   160 		$this->leading_html_start = $leading_html_start;
       
   161 	}
       
   162 }
       
   163 
       
   164 /**
       
   165  * Class WP_Block_Parser
       
   166  *
       
   167  * Parses a document and constructs a list of parsed block objects
       
   168  *
       
   169  * @since 3.8.0
       
   170  * @since 4.0.0 returns arrays not objects, all attributes are arrays
       
   171  */
       
   172 class WP_Block_Parser {
       
   173 	/**
       
   174 	 * Input document being parsed
       
   175 	 *
       
   176 	 * @example "Pre-text\n<!-- wp:paragraph -->This is inside a block!<!-- /wp:paragraph -->"
       
   177 	 *
       
   178 	 * @since 3.8.0
       
   179 	 * @var string
       
   180 	 */
       
   181 	public $document;
       
   182 
       
   183 	/**
       
   184 	 * Tracks parsing progress through document
       
   185 	 *
       
   186 	 * @since 3.8.0
       
   187 	 * @var int
       
   188 	 */
       
   189 	public $offset;
       
   190 
       
   191 	/**
       
   192 	 * List of parsed blocks
       
   193 	 *
       
   194 	 * @since 3.8.0
       
   195 	 * @var WP_Block_Parser_Block[]
       
   196 	 */
       
   197 	public $output;
       
   198 
       
   199 	/**
       
   200 	 * Stack of partially-parsed structures in memory during parse
       
   201 	 *
       
   202 	 * @since 3.8.0
       
   203 	 * @var WP_Block_Parser_Frame[]
       
   204 	 */
       
   205 	public $stack;
       
   206 
       
   207 	/**
       
   208 	 * Empty associative array, here due to PHP quirks
       
   209 	 *
       
   210 	 * @since 4.4.0
       
   211 	 * @var array empty associative array
       
   212 	 */
       
   213 	public $empty_attrs;
       
   214 
       
   215 	/**
       
   216 	 * Parses a document and returns a list of block structures
       
   217 	 *
       
   218 	 * When encountering an invalid parse will return a best-effort
       
   219 	 * parse. In contrast to the specification parser this does not
       
   220 	 * return an error on invalid inputs.
       
   221 	 *
       
   222 	 * @since 3.8.0
       
   223 	 *
       
   224 	 * @param string $document Input document being parsed.
       
   225 	 * @return WP_Block_Parser_Block[]
       
   226 	 */
       
   227 	function parse( $document ) {
       
   228 		$this->document    = $document;
       
   229 		$this->offset      = 0;
       
   230 		$this->output      = array();
       
   231 		$this->stack       = array();
       
   232 		$this->empty_attrs = json_decode( '{}', true );
       
   233 
       
   234 		do {
       
   235 			// twiddle our thumbs.
       
   236 		} while ( $this->proceed() );
       
   237 
       
   238 		return $this->output;
       
   239 	}
       
   240 
       
   241 	/**
       
   242 	 * Processes the next token from the input document
       
   243 	 * and returns whether to proceed eating more tokens
       
   244 	 *
       
   245 	 * This is the "next step" function that essentially
       
   246 	 * takes a token as its input and decides what to do
       
   247 	 * with that token before descending deeper into a
       
   248 	 * nested block tree or continuing along the document
       
   249 	 * or breaking out of a level of nesting.
       
   250 	 *
       
   251 	 * @internal
       
   252 	 * @since 3.8.0
       
   253 	 * @return bool
       
   254 	 */
       
   255 	function proceed() {
       
   256 		$next_token = $this->next_token();
       
   257 		list( $token_type, $block_name, $attrs, $start_offset, $token_length ) = $next_token;
       
   258 		$stack_depth = count( $this->stack );
       
   259 
       
   260 		// we may have some HTML soup before the next block.
       
   261 		$leading_html_start = $start_offset > $this->offset ? $this->offset : null;
       
   262 
       
   263 		switch ( $token_type ) {
       
   264 			case 'no-more-tokens':
       
   265 				// if not in a block then flush output.
       
   266 				if ( 0 === $stack_depth ) {
       
   267 					$this->add_freeform();
       
   268 					return false;
       
   269 				}
       
   270 
       
   271 				/*
       
   272 				 * Otherwise we have a problem
       
   273 				 * This is an error
       
   274 				 *
       
   275 				 * we have options
       
   276 				 * - treat it all as freeform text
       
   277 				 * - assume an implicit closer (easiest when not nesting)
       
   278 				 */
       
   279 
       
   280 				// for the easy case we'll assume an implicit closer.
       
   281 				if ( 1 === $stack_depth ) {
       
   282 					$this->add_block_from_stack();
       
   283 					return false;
       
   284 				}
       
   285 
       
   286 				/*
       
   287 				 * for the nested case where it's more difficult we'll
       
   288 				 * have to assume that multiple closers are missing
       
   289 				 * and so we'll collapse the whole stack piecewise
       
   290 				 */
       
   291 				while ( 0 < count( $this->stack ) ) {
       
   292 					$this->add_block_from_stack();
       
   293 				}
       
   294 				return false;
       
   295 
       
   296 			case 'void-block':
       
   297 				/*
       
   298 				 * easy case is if we stumbled upon a void block
       
   299 				 * in the top-level of the document
       
   300 				 */
       
   301 				if ( 0 === $stack_depth ) {
       
   302 					if ( isset( $leading_html_start ) ) {
       
   303 						$this->output[] = (array) self::freeform(
       
   304 							substr(
       
   305 								$this->document,
       
   306 								$leading_html_start,
       
   307 								$start_offset - $leading_html_start
       
   308 							)
       
   309 						);
       
   310 					}
       
   311 
       
   312 					$this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() );
       
   313 					$this->offset   = $start_offset + $token_length;
       
   314 					return true;
       
   315 				}
       
   316 
       
   317 				// otherwise we found an inner block.
       
   318 				$this->add_inner_block(
       
   319 					new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
       
   320 					$start_offset,
       
   321 					$token_length
       
   322 				);
       
   323 				$this->offset = $start_offset + $token_length;
       
   324 				return true;
       
   325 
       
   326 			case 'block-opener':
       
   327 				// track all newly-opened blocks on the stack.
       
   328 				array_push(
       
   329 					$this->stack,
       
   330 					new WP_Block_Parser_Frame(
       
   331 						new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ),
       
   332 						$start_offset,
       
   333 						$token_length,
       
   334 						$start_offset + $token_length,
       
   335 						$leading_html_start
       
   336 					)
       
   337 				);
       
   338 				$this->offset = $start_offset + $token_length;
       
   339 				return true;
       
   340 
       
   341 			case 'block-closer':
       
   342 				/*
       
   343 				 * if we're missing an opener we're in trouble
       
   344 				 * This is an error
       
   345 				 */
       
   346 				if ( 0 === $stack_depth ) {
       
   347 					/*
       
   348 					 * we have options
       
   349 					 * - assume an implicit opener
       
   350 					 * - assume _this_ is the opener
       
   351 					 * - give up and close out the document
       
   352 					 */
       
   353 					$this->add_freeform();
       
   354 					return false;
       
   355 				}
       
   356 
       
   357 				// if we're not nesting then this is easy - close the block.
       
   358 				if ( 1 === $stack_depth ) {
       
   359 					$this->add_block_from_stack( $start_offset );
       
   360 					$this->offset = $start_offset + $token_length;
       
   361 					return true;
       
   362 				}
       
   363 
       
   364 				/*
       
   365 				 * otherwise we're nested and we have to close out the current
       
   366 				 * block and add it as a new innerBlock to the parent
       
   367 				 */
       
   368 				$stack_top                        = array_pop( $this->stack );
       
   369 				$html                             = substr( $this->document, $stack_top->prev_offset, $start_offset - $stack_top->prev_offset );
       
   370 				$stack_top->block->innerHTML     .= $html;
       
   371 				$stack_top->block->innerContent[] = $html;
       
   372 				$stack_top->prev_offset           = $start_offset + $token_length;
       
   373 
       
   374 				$this->add_inner_block(
       
   375 					$stack_top->block,
       
   376 					$stack_top->token_start,
       
   377 					$stack_top->token_length,
       
   378 					$start_offset + $token_length
       
   379 				);
       
   380 				$this->offset = $start_offset + $token_length;
       
   381 				return true;
       
   382 
       
   383 			default:
       
   384 				// This is an error.
       
   385 				$this->add_freeform();
       
   386 				return false;
       
   387 		}
       
   388 	}
       
   389 
       
   390 	/**
       
   391 	 * Scans the document from where we last left off
       
   392 	 * and finds the next valid token to parse if it exists
       
   393 	 *
       
   394 	 * Returns the type of the find: kind of find, block information, attributes
       
   395 	 *
       
   396 	 * @internal
       
   397 	 * @since 3.8.0
       
   398 	 * @since 4.6.1 fixed a bug in attribute parsing which caused catastrophic backtracking on invalid block comments
       
   399 	 * @return array
       
   400 	 */
       
   401 	function next_token() {
       
   402 		$matches = null;
       
   403 
       
   404 		/*
       
   405 		 * aye the magic
       
   406 		 * we're using a single RegExp to tokenize the block comment delimiters
       
   407 		 * we're also using a trick here because the only difference between a
       
   408 		 * block opener and a block closer is the leading `/` before `wp:` (and
       
   409 		 * a closer has no attributes). we can trap them both and process the
       
   410 		 * match back in PHP to see which one it was.
       
   411 		 */
       
   412 		$has_match = preg_match(
       
   413 			'/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s',
       
   414 			$this->document,
       
   415 			$matches,
       
   416 			PREG_OFFSET_CAPTURE,
       
   417 			$this->offset
       
   418 		);
       
   419 
       
   420 		// if we get here we probably have catastrophic backtracking or out-of-memory in the PCRE.
       
   421 		if ( false === $has_match ) {
       
   422 			return array( 'no-more-tokens', null, null, null, null );
       
   423 		}
       
   424 
       
   425 		// we have no more tokens.
       
   426 		if ( 0 === $has_match ) {
       
   427 			return array( 'no-more-tokens', null, null, null, null );
       
   428 		}
       
   429 
       
   430 		list( $match, $started_at ) = $matches[0];
       
   431 
       
   432 		$length    = strlen( $match );
       
   433 		$is_closer = isset( $matches['closer'] ) && -1 !== $matches['closer'][1];
       
   434 		$is_void   = isset( $matches['void'] ) && -1 !== $matches['void'][1];
       
   435 		$namespace = $matches['namespace'];
       
   436 		$namespace = ( isset( $namespace ) && -1 !== $namespace[1] ) ? $namespace[0] : 'core/';
       
   437 		$name      = $namespace . $matches['name'][0];
       
   438 		$has_attrs = isset( $matches['attrs'] ) && -1 !== $matches['attrs'][1];
       
   439 
       
   440 		/*
       
   441 		 * Fun fact! It's not trivial in PHP to create "an empty associative array" since all arrays
       
   442 		 * are associative arrays. If we use `array()` we get a JSON `[]`
       
   443 		 */
       
   444 		$attrs = $has_attrs
       
   445 			? json_decode( $matches['attrs'][0], /* as-associative */ true )
       
   446 			: $this->empty_attrs;
       
   447 
       
   448 		/*
       
   449 		 * This state isn't allowed
       
   450 		 * This is an error
       
   451 		 */
       
   452 		if ( $is_closer && ( $is_void || $has_attrs ) ) {
       
   453 			// we can ignore them since they don't hurt anything.
       
   454 		}
       
   455 
       
   456 		if ( $is_void ) {
       
   457 			return array( 'void-block', $name, $attrs, $started_at, $length );
       
   458 		}
       
   459 
       
   460 		if ( $is_closer ) {
       
   461 			return array( 'block-closer', $name, null, $started_at, $length );
       
   462 		}
       
   463 
       
   464 		return array( 'block-opener', $name, $attrs, $started_at, $length );
       
   465 	}
       
   466 
       
   467 	/**
       
   468 	 * Returns a new block object for freeform HTML
       
   469 	 *
       
   470 	 * @internal
       
   471 	 * @since 3.9.0
       
   472 	 *
       
   473 	 * @param string $innerHTML HTML content of block.
       
   474 	 * @return WP_Block_Parser_Block freeform block object.
       
   475 	 */
       
   476 	function freeform( $innerHTML ) {
       
   477 		return new WP_Block_Parser_Block( null, $this->empty_attrs, array(), $innerHTML, array( $innerHTML ) );
       
   478 	}
       
   479 
       
   480 	/**
       
   481 	 * Pushes a length of text from the input document
       
   482 	 * to the output list as a freeform block.
       
   483 	 *
       
   484 	 * @internal
       
   485 	 * @since 3.8.0
       
   486 	 * @param null $length how many bytes of document text to output.
       
   487 	 */
       
   488 	function add_freeform( $length = null ) {
       
   489 		$length = $length ? $length : strlen( $this->document ) - $this->offset;
       
   490 
       
   491 		if ( 0 === $length ) {
       
   492 			return;
       
   493 		}
       
   494 
       
   495 		$this->output[] = (array) self::freeform( substr( $this->document, $this->offset, $length ) );
       
   496 	}
       
   497 
       
   498 	/**
       
   499 	 * Given a block structure from memory pushes
       
   500 	 * a new block to the output list.
       
   501 	 *
       
   502 	 * @internal
       
   503 	 * @since 3.8.0
       
   504 	 * @param WP_Block_Parser_Block $block        The block to add to the output.
       
   505 	 * @param int                   $token_start  Byte offset into the document where the first token for the block starts.
       
   506 	 * @param int                   $token_length Byte length of entire block from start of opening token to end of closing token.
       
   507 	 * @param int|null              $last_offset  Last byte offset into document if continuing form earlier output.
       
   508 	 */
       
   509 	function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) {
       
   510 		$parent                       = $this->stack[ count( $this->stack ) - 1 ];
       
   511 		$parent->block->innerBlocks[] = (array) $block;
       
   512 		$html                         = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset );
       
   513 
       
   514 		if ( ! empty( $html ) ) {
       
   515 			$parent->block->innerHTML     .= $html;
       
   516 			$parent->block->innerContent[] = $html;
       
   517 		}
       
   518 
       
   519 		$parent->block->innerContent[] = null;
       
   520 		$parent->prev_offset           = $last_offset ? $last_offset : $token_start + $token_length;
       
   521 	}
       
   522 
       
   523 	/**
       
   524 	 * Pushes the top block from the parsing stack to the output list.
       
   525 	 *
       
   526 	 * @internal
       
   527 	 * @since 3.8.0
       
   528 	 * @param int|null $end_offset byte offset into document for where we should stop sending text output as HTML.
       
   529 	 */
       
   530 	function add_block_from_stack( $end_offset = null ) {
       
   531 		$stack_top   = array_pop( $this->stack );
       
   532 		$prev_offset = $stack_top->prev_offset;
       
   533 
       
   534 		$html = isset( $end_offset )
       
   535 			? substr( $this->document, $prev_offset, $end_offset - $prev_offset )
       
   536 			: substr( $this->document, $prev_offset );
       
   537 
       
   538 		if ( ! empty( $html ) ) {
       
   539 			$stack_top->block->innerHTML     .= $html;
       
   540 			$stack_top->block->innerContent[] = $html;
       
   541 		}
       
   542 
       
   543 		if ( isset( $stack_top->leading_html_start ) ) {
       
   544 			$this->output[] = (array) self::freeform(
       
   545 				substr(
       
   546 					$this->document,
       
   547 					$stack_top->leading_html_start,
       
   548 					$stack_top->token_start - $stack_top->leading_html_start
       
   549 				)
       
   550 			);
       
   551 		}
       
   552 
       
   553 		$this->output[] = (array) $stack_top->block;
       
   554 	}
       
   555 }