|
1 <?php |
|
2 /** |
|
3 * Block Serialization Parser |
|
4 * |
|
5 * @package WordPress |
|
6 */ |
|
7 |
|
8 /** |
|
9 * Class WP_Block_Parser_Block |
|
10 * |
|
11 * Holds the block structure in memory |
|
12 * |
|
13 * @since 3.8.0 |
|
14 */ |
|
15 class WP_Block_Parser_Block { |
|
16 /** |
|
17 * Name of block |
|
18 * |
|
19 * @example "core/paragraph" |
|
20 * |
|
21 * @since 3.8.0 |
|
22 * @var string |
|
23 */ |
|
24 public $blockName; |
|
25 |
|
26 /** |
|
27 * Optional set of attributes from block comment delimiters |
|
28 * |
|
29 * @example null |
|
30 * @example array( 'columns' => 3 ) |
|
31 * |
|
32 * @since 3.8.0 |
|
33 * @var array|null |
|
34 */ |
|
35 public $attrs; |
|
36 |
|
37 /** |
|
38 * List of inner blocks (of this same class) |
|
39 * |
|
40 * @since 3.8.0 |
|
41 * @var WP_Block_Parser_Block[] |
|
42 */ |
|
43 public $innerBlocks; |
|
44 |
|
45 /** |
|
46 * Resultant HTML from inside block comment delimiters |
|
47 * after removing inner blocks |
|
48 * |
|
49 * @example "...Just <!-- wp:test /--> testing..." -> "Just testing..." |
|
50 * |
|
51 * @since 3.8.0 |
|
52 * @var string |
|
53 */ |
|
54 public $innerHTML; |
|
55 |
|
56 /** |
|
57 * List of string fragments and null markers where inner blocks were found |
|
58 * |
|
59 * @example array( |
|
60 * 'innerHTML' => 'BeforeInnerAfter', |
|
61 * 'innerBlocks' => array( block, block ), |
|
62 * 'innerContent' => array( 'Before', null, 'Inner', null, 'After' ), |
|
63 * ) |
|
64 * |
|
65 * @since 4.2.0 |
|
66 * @var array |
|
67 */ |
|
68 public $innerContent; |
|
69 |
|
70 /** |
|
71 * Constructor. |
|
72 * |
|
73 * Will populate object properties from the provided arguments. |
|
74 * |
|
75 * @since 3.8.0 |
|
76 * |
|
77 * @param string $name Name of block. |
|
78 * @param array $attrs Optional set of attributes from block comment delimiters. |
|
79 * @param array $innerBlocks List of inner blocks (of this same class). |
|
80 * @param string $innerHTML Resultant HTML from inside block comment delimiters after removing inner blocks. |
|
81 * @param array $innerContent List of string fragments and null markers where inner blocks were found. |
|
82 */ |
|
83 function __construct( $name, $attrs, $innerBlocks, $innerHTML, $innerContent ) { |
|
84 $this->blockName = $name; |
|
85 $this->attrs = $attrs; |
|
86 $this->innerBlocks = $innerBlocks; |
|
87 $this->innerHTML = $innerHTML; |
|
88 $this->innerContent = $innerContent; |
|
89 } |
|
90 } |
|
91 |
|
92 /** |
|
93 * Class WP_Block_Parser_Frame |
|
94 * |
|
95 * Holds partial blocks in memory while parsing |
|
96 * |
|
97 * @internal |
|
98 * @since 3.8.0 |
|
99 */ |
|
100 class WP_Block_Parser_Frame { |
|
101 /** |
|
102 * Full or partial block |
|
103 * |
|
104 * @since 3.8.0 |
|
105 * @var WP_Block_Parser_Block |
|
106 */ |
|
107 public $block; |
|
108 |
|
109 /** |
|
110 * Byte offset into document for start of parse token |
|
111 * |
|
112 * @since 3.8.0 |
|
113 * @var int |
|
114 */ |
|
115 public $token_start; |
|
116 |
|
117 /** |
|
118 * Byte length of entire parse token string |
|
119 * |
|
120 * @since 3.8.0 |
|
121 * @var int |
|
122 */ |
|
123 public $token_length; |
|
124 |
|
125 /** |
|
126 * Byte offset into document for after parse token ends |
|
127 * (used during reconstruction of stack into parse production) |
|
128 * |
|
129 * @since 3.8.0 |
|
130 * @var int |
|
131 */ |
|
132 public $prev_offset; |
|
133 |
|
134 /** |
|
135 * Byte offset into document where leading HTML before token starts |
|
136 * |
|
137 * @since 3.8.0 |
|
138 * @var int |
|
139 */ |
|
140 public $leading_html_start; |
|
141 |
|
142 /** |
|
143 * Constructor |
|
144 * |
|
145 * Will populate object properties from the provided arguments. |
|
146 * |
|
147 * @since 3.8.0 |
|
148 * |
|
149 * @param WP_Block_Parser_Block $block Full or partial block. |
|
150 * @param int $token_start Byte offset into document for start of parse token. |
|
151 * @param int $token_length Byte length of entire parse token string. |
|
152 * @param int $prev_offset Byte offset into document for after parse token ends. |
|
153 * @param int $leading_html_start Byte offset into document where leading HTML before token starts. |
|
154 */ |
|
155 function __construct( $block, $token_start, $token_length, $prev_offset = null, $leading_html_start = null ) { |
|
156 $this->block = $block; |
|
157 $this->token_start = $token_start; |
|
158 $this->token_length = $token_length; |
|
159 $this->prev_offset = isset( $prev_offset ) ? $prev_offset : $token_start + $token_length; |
|
160 $this->leading_html_start = $leading_html_start; |
|
161 } |
|
162 } |
|
163 |
|
164 /** |
|
165 * Class WP_Block_Parser |
|
166 * |
|
167 * Parses a document and constructs a list of parsed block objects |
|
168 * |
|
169 * @since 3.8.0 |
|
170 * @since 4.0.0 returns arrays not objects, all attributes are arrays |
|
171 */ |
|
172 class WP_Block_Parser { |
|
173 /** |
|
174 * Input document being parsed |
|
175 * |
|
176 * @example "Pre-text\n<!-- wp:paragraph -->This is inside a block!<!-- /wp:paragraph -->" |
|
177 * |
|
178 * @since 3.8.0 |
|
179 * @var string |
|
180 */ |
|
181 public $document; |
|
182 |
|
183 /** |
|
184 * Tracks parsing progress through document |
|
185 * |
|
186 * @since 3.8.0 |
|
187 * @var int |
|
188 */ |
|
189 public $offset; |
|
190 |
|
191 /** |
|
192 * List of parsed blocks |
|
193 * |
|
194 * @since 3.8.0 |
|
195 * @var WP_Block_Parser_Block[] |
|
196 */ |
|
197 public $output; |
|
198 |
|
199 /** |
|
200 * Stack of partially-parsed structures in memory during parse |
|
201 * |
|
202 * @since 3.8.0 |
|
203 * @var WP_Block_Parser_Frame[] |
|
204 */ |
|
205 public $stack; |
|
206 |
|
207 /** |
|
208 * Empty associative array, here due to PHP quirks |
|
209 * |
|
210 * @since 4.4.0 |
|
211 * @var array empty associative array |
|
212 */ |
|
213 public $empty_attrs; |
|
214 |
|
215 /** |
|
216 * Parses a document and returns a list of block structures |
|
217 * |
|
218 * When encountering an invalid parse will return a best-effort |
|
219 * parse. In contrast to the specification parser this does not |
|
220 * return an error on invalid inputs. |
|
221 * |
|
222 * @since 3.8.0 |
|
223 * |
|
224 * @param string $document Input document being parsed. |
|
225 * @return WP_Block_Parser_Block[] |
|
226 */ |
|
227 function parse( $document ) { |
|
228 $this->document = $document; |
|
229 $this->offset = 0; |
|
230 $this->output = array(); |
|
231 $this->stack = array(); |
|
232 $this->empty_attrs = json_decode( '{}', true ); |
|
233 |
|
234 do { |
|
235 // twiddle our thumbs. |
|
236 } while ( $this->proceed() ); |
|
237 |
|
238 return $this->output; |
|
239 } |
|
240 |
|
241 /** |
|
242 * Processes the next token from the input document |
|
243 * and returns whether to proceed eating more tokens |
|
244 * |
|
245 * This is the "next step" function that essentially |
|
246 * takes a token as its input and decides what to do |
|
247 * with that token before descending deeper into a |
|
248 * nested block tree or continuing along the document |
|
249 * or breaking out of a level of nesting. |
|
250 * |
|
251 * @internal |
|
252 * @since 3.8.0 |
|
253 * @return bool |
|
254 */ |
|
255 function proceed() { |
|
256 $next_token = $this->next_token(); |
|
257 list( $token_type, $block_name, $attrs, $start_offset, $token_length ) = $next_token; |
|
258 $stack_depth = count( $this->stack ); |
|
259 |
|
260 // we may have some HTML soup before the next block. |
|
261 $leading_html_start = $start_offset > $this->offset ? $this->offset : null; |
|
262 |
|
263 switch ( $token_type ) { |
|
264 case 'no-more-tokens': |
|
265 // if not in a block then flush output. |
|
266 if ( 0 === $stack_depth ) { |
|
267 $this->add_freeform(); |
|
268 return false; |
|
269 } |
|
270 |
|
271 /* |
|
272 * Otherwise we have a problem |
|
273 * This is an error |
|
274 * |
|
275 * we have options |
|
276 * - treat it all as freeform text |
|
277 * - assume an implicit closer (easiest when not nesting) |
|
278 */ |
|
279 |
|
280 // for the easy case we'll assume an implicit closer. |
|
281 if ( 1 === $stack_depth ) { |
|
282 $this->add_block_from_stack(); |
|
283 return false; |
|
284 } |
|
285 |
|
286 /* |
|
287 * for the nested case where it's more difficult we'll |
|
288 * have to assume that multiple closers are missing |
|
289 * and so we'll collapse the whole stack piecewise |
|
290 */ |
|
291 while ( 0 < count( $this->stack ) ) { |
|
292 $this->add_block_from_stack(); |
|
293 } |
|
294 return false; |
|
295 |
|
296 case 'void-block': |
|
297 /* |
|
298 * easy case is if we stumbled upon a void block |
|
299 * in the top-level of the document |
|
300 */ |
|
301 if ( 0 === $stack_depth ) { |
|
302 if ( isset( $leading_html_start ) ) { |
|
303 $this->output[] = (array) self::freeform( |
|
304 substr( |
|
305 $this->document, |
|
306 $leading_html_start, |
|
307 $start_offset - $leading_html_start |
|
308 ) |
|
309 ); |
|
310 } |
|
311 |
|
312 $this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ); |
|
313 $this->offset = $start_offset + $token_length; |
|
314 return true; |
|
315 } |
|
316 |
|
317 // otherwise we found an inner block. |
|
318 $this->add_inner_block( |
|
319 new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ), |
|
320 $start_offset, |
|
321 $token_length |
|
322 ); |
|
323 $this->offset = $start_offset + $token_length; |
|
324 return true; |
|
325 |
|
326 case 'block-opener': |
|
327 // track all newly-opened blocks on the stack. |
|
328 array_push( |
|
329 $this->stack, |
|
330 new WP_Block_Parser_Frame( |
|
331 new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ), |
|
332 $start_offset, |
|
333 $token_length, |
|
334 $start_offset + $token_length, |
|
335 $leading_html_start |
|
336 ) |
|
337 ); |
|
338 $this->offset = $start_offset + $token_length; |
|
339 return true; |
|
340 |
|
341 case 'block-closer': |
|
342 /* |
|
343 * if we're missing an opener we're in trouble |
|
344 * This is an error |
|
345 */ |
|
346 if ( 0 === $stack_depth ) { |
|
347 /* |
|
348 * we have options |
|
349 * - assume an implicit opener |
|
350 * - assume _this_ is the opener |
|
351 * - give up and close out the document |
|
352 */ |
|
353 $this->add_freeform(); |
|
354 return false; |
|
355 } |
|
356 |
|
357 // if we're not nesting then this is easy - close the block. |
|
358 if ( 1 === $stack_depth ) { |
|
359 $this->add_block_from_stack( $start_offset ); |
|
360 $this->offset = $start_offset + $token_length; |
|
361 return true; |
|
362 } |
|
363 |
|
364 /* |
|
365 * otherwise we're nested and we have to close out the current |
|
366 * block and add it as a new innerBlock to the parent |
|
367 */ |
|
368 $stack_top = array_pop( $this->stack ); |
|
369 $html = substr( $this->document, $stack_top->prev_offset, $start_offset - $stack_top->prev_offset ); |
|
370 $stack_top->block->innerHTML .= $html; |
|
371 $stack_top->block->innerContent[] = $html; |
|
372 $stack_top->prev_offset = $start_offset + $token_length; |
|
373 |
|
374 $this->add_inner_block( |
|
375 $stack_top->block, |
|
376 $stack_top->token_start, |
|
377 $stack_top->token_length, |
|
378 $start_offset + $token_length |
|
379 ); |
|
380 $this->offset = $start_offset + $token_length; |
|
381 return true; |
|
382 |
|
383 default: |
|
384 // This is an error. |
|
385 $this->add_freeform(); |
|
386 return false; |
|
387 } |
|
388 } |
|
389 |
|
390 /** |
|
391 * Scans the document from where we last left off |
|
392 * and finds the next valid token to parse if it exists |
|
393 * |
|
394 * Returns the type of the find: kind of find, block information, attributes |
|
395 * |
|
396 * @internal |
|
397 * @since 3.8.0 |
|
398 * @since 4.6.1 fixed a bug in attribute parsing which caused catastrophic backtracking on invalid block comments |
|
399 * @return array |
|
400 */ |
|
401 function next_token() { |
|
402 $matches = null; |
|
403 |
|
404 /* |
|
405 * aye the magic |
|
406 * we're using a single RegExp to tokenize the block comment delimiters |
|
407 * we're also using a trick here because the only difference between a |
|
408 * block opener and a block closer is the leading `/` before `wp:` (and |
|
409 * a closer has no attributes). we can trap them both and process the |
|
410 * match back in PHP to see which one it was. |
|
411 */ |
|
412 $has_match = preg_match( |
|
413 '/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s', |
|
414 $this->document, |
|
415 $matches, |
|
416 PREG_OFFSET_CAPTURE, |
|
417 $this->offset |
|
418 ); |
|
419 |
|
420 // if we get here we probably have catastrophic backtracking or out-of-memory in the PCRE. |
|
421 if ( false === $has_match ) { |
|
422 return array( 'no-more-tokens', null, null, null, null ); |
|
423 } |
|
424 |
|
425 // we have no more tokens. |
|
426 if ( 0 === $has_match ) { |
|
427 return array( 'no-more-tokens', null, null, null, null ); |
|
428 } |
|
429 |
|
430 list( $match, $started_at ) = $matches[0]; |
|
431 |
|
432 $length = strlen( $match ); |
|
433 $is_closer = isset( $matches['closer'] ) && -1 !== $matches['closer'][1]; |
|
434 $is_void = isset( $matches['void'] ) && -1 !== $matches['void'][1]; |
|
435 $namespace = $matches['namespace']; |
|
436 $namespace = ( isset( $namespace ) && -1 !== $namespace[1] ) ? $namespace[0] : 'core/'; |
|
437 $name = $namespace . $matches['name'][0]; |
|
438 $has_attrs = isset( $matches['attrs'] ) && -1 !== $matches['attrs'][1]; |
|
439 |
|
440 /* |
|
441 * Fun fact! It's not trivial in PHP to create "an empty associative array" since all arrays |
|
442 * are associative arrays. If we use `array()` we get a JSON `[]` |
|
443 */ |
|
444 $attrs = $has_attrs |
|
445 ? json_decode( $matches['attrs'][0], /* as-associative */ true ) |
|
446 : $this->empty_attrs; |
|
447 |
|
448 /* |
|
449 * This state isn't allowed |
|
450 * This is an error |
|
451 */ |
|
452 if ( $is_closer && ( $is_void || $has_attrs ) ) { |
|
453 // we can ignore them since they don't hurt anything. |
|
454 } |
|
455 |
|
456 if ( $is_void ) { |
|
457 return array( 'void-block', $name, $attrs, $started_at, $length ); |
|
458 } |
|
459 |
|
460 if ( $is_closer ) { |
|
461 return array( 'block-closer', $name, null, $started_at, $length ); |
|
462 } |
|
463 |
|
464 return array( 'block-opener', $name, $attrs, $started_at, $length ); |
|
465 } |
|
466 |
|
467 /** |
|
468 * Returns a new block object for freeform HTML |
|
469 * |
|
470 * @internal |
|
471 * @since 3.9.0 |
|
472 * |
|
473 * @param string $innerHTML HTML content of block. |
|
474 * @return WP_Block_Parser_Block freeform block object. |
|
475 */ |
|
476 function freeform( $innerHTML ) { |
|
477 return new WP_Block_Parser_Block( null, $this->empty_attrs, array(), $innerHTML, array( $innerHTML ) ); |
|
478 } |
|
479 |
|
480 /** |
|
481 * Pushes a length of text from the input document |
|
482 * to the output list as a freeform block. |
|
483 * |
|
484 * @internal |
|
485 * @since 3.8.0 |
|
486 * @param null $length how many bytes of document text to output. |
|
487 */ |
|
488 function add_freeform( $length = null ) { |
|
489 $length = $length ? $length : strlen( $this->document ) - $this->offset; |
|
490 |
|
491 if ( 0 === $length ) { |
|
492 return; |
|
493 } |
|
494 |
|
495 $this->output[] = (array) self::freeform( substr( $this->document, $this->offset, $length ) ); |
|
496 } |
|
497 |
|
498 /** |
|
499 * Given a block structure from memory pushes |
|
500 * a new block to the output list. |
|
501 * |
|
502 * @internal |
|
503 * @since 3.8.0 |
|
504 * @param WP_Block_Parser_Block $block The block to add to the output. |
|
505 * @param int $token_start Byte offset into the document where the first token for the block starts. |
|
506 * @param int $token_length Byte length of entire block from start of opening token to end of closing token. |
|
507 * @param int|null $last_offset Last byte offset into document if continuing form earlier output. |
|
508 */ |
|
509 function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) { |
|
510 $parent = $this->stack[ count( $this->stack ) - 1 ]; |
|
511 $parent->block->innerBlocks[] = (array) $block; |
|
512 $html = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset ); |
|
513 |
|
514 if ( ! empty( $html ) ) { |
|
515 $parent->block->innerHTML .= $html; |
|
516 $parent->block->innerContent[] = $html; |
|
517 } |
|
518 |
|
519 $parent->block->innerContent[] = null; |
|
520 $parent->prev_offset = $last_offset ? $last_offset : $token_start + $token_length; |
|
521 } |
|
522 |
|
523 /** |
|
524 * Pushes the top block from the parsing stack to the output list. |
|
525 * |
|
526 * @internal |
|
527 * @since 3.8.0 |
|
528 * @param int|null $end_offset byte offset into document for where we should stop sending text output as HTML. |
|
529 */ |
|
530 function add_block_from_stack( $end_offset = null ) { |
|
531 $stack_top = array_pop( $this->stack ); |
|
532 $prev_offset = $stack_top->prev_offset; |
|
533 |
|
534 $html = isset( $end_offset ) |
|
535 ? substr( $this->document, $prev_offset, $end_offset - $prev_offset ) |
|
536 : substr( $this->document, $prev_offset ); |
|
537 |
|
538 if ( ! empty( $html ) ) { |
|
539 $stack_top->block->innerHTML .= $html; |
|
540 $stack_top->block->innerContent[] = $html; |
|
541 } |
|
542 |
|
543 if ( isset( $stack_top->leading_html_start ) ) { |
|
544 $this->output[] = (array) self::freeform( |
|
545 substr( |
|
546 $this->document, |
|
547 $stack_top->leading_html_start, |
|
548 $stack_top->token_start - $stack_top->leading_html_start |
|
549 ) |
|
550 ); |
|
551 } |
|
552 |
|
553 $this->output[] = (array) $stack_top->block; |
|
554 } |
|
555 } |