author | ymh <ymh.work@gmail.com> |
Mon, 08 Sep 2025 19:44:41 +0200 | |
changeset 23 | 417f20492bf7 |
parent 21 | 48c4eec2b7e6 |
permissions | -rw-r--r-- |
9 | 1 |
<?php |
2 |
/** |
|
3 |
* Block Serialization Parser |
|
4 |
* |
|
5 |
* @package WordPress |
|
6 |
*/ |
|
7 |
||
8 |
/** |
|
9 |
* Class WP_Block_Parser |
|
10 |
* |
|
11 |
* Parses a document and constructs a list of parsed block objects |
|
12 |
* |
|
18 | 13 |
* @since 5.0.0 |
9 | 14 |
* @since 4.0.0 returns arrays not objects, all attributes are arrays |
15 |
*/ |
|
16 |
class WP_Block_Parser { |
|
17 |
/** |
|
18 |
* Input document being parsed |
|
19 |
* |
|
20 |
* @example "Pre-text\n<!-- wp:paragraph -->This is inside a block!<!-- /wp:paragraph -->" |
|
21 |
* |
|
18 | 22 |
* @since 5.0.0 |
9 | 23 |
* @var string |
24 |
*/ |
|
25 |
public $document; |
|
26 |
||
27 |
/** |
|
28 |
* Tracks parsing progress through document |
|
29 |
* |
|
18 | 30 |
* @since 5.0.0 |
9 | 31 |
* @var int |
32 |
*/ |
|
33 |
public $offset; |
|
34 |
||
35 |
/** |
|
36 |
* List of parsed blocks |
|
37 |
* |
|
18 | 38 |
* @since 5.0.0 |
9 | 39 |
* @var WP_Block_Parser_Block[] |
40 |
*/ |
|
41 |
public $output; |
|
42 |
||
43 |
/** |
|
44 |
* Stack of partially-parsed structures in memory during parse |
|
45 |
* |
|
18 | 46 |
* @since 5.0.0 |
9 | 47 |
* @var WP_Block_Parser_Frame[] |
48 |
*/ |
|
49 |
public $stack; |
|
50 |
||
51 |
/** |
|
52 |
* Parses a document and returns a list of block structures |
|
53 |
* |
|
54 |
* When encountering an invalid parse will return a best-effort |
|
55 |
* parse. In contrast to the specification parser this does not |
|
56 |
* return an error on invalid inputs. |
|
57 |
* |
|
18 | 58 |
* @since 5.0.0 |
9 | 59 |
* |
60 |
* @param string $document Input document being parsed. |
|
21
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
61 |
* @return array[] |
9 | 62 |
*/ |
21
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
63 |
public function parse( $document ) { |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
64 |
$this->document = $document; |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
65 |
$this->offset = 0; |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
66 |
$this->output = array(); |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
67 |
$this->stack = array(); |
9 | 68 |
|
21
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
69 |
while ( $this->proceed() ) { |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
70 |
continue; |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
71 |
} |
9 | 72 |
|
73 |
return $this->output; |
|
74 |
} |
|
75 |
||
76 |
/** |
|
77 |
* Processes the next token from the input document |
|
78 |
* and returns whether to proceed eating more tokens |
|
79 |
* |
|
80 |
* This is the "next step" function that essentially |
|
81 |
* takes a token as its input and decides what to do |
|
82 |
* with that token before descending deeper into a |
|
83 |
* nested block tree or continuing along the document |
|
84 |
* or breaking out of a level of nesting. |
|
85 |
* |
|
86 |
* @internal |
|
18 | 87 |
* @since 5.0.0 |
9 | 88 |
* @return bool |
89 |
*/ |
|
21
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
90 |
public function proceed() { |
9 | 91 |
$next_token = $this->next_token(); |
92 |
list( $token_type, $block_name, $attrs, $start_offset, $token_length ) = $next_token; |
|
93 |
$stack_depth = count( $this->stack ); |
|
94 |
||
95 |
// we may have some HTML soup before the next block. |
|
96 |
$leading_html_start = $start_offset > $this->offset ? $this->offset : null; |
|
97 |
||
98 |
switch ( $token_type ) { |
|
99 |
case 'no-more-tokens': |
|
100 |
// if not in a block then flush output. |
|
101 |
if ( 0 === $stack_depth ) { |
|
102 |
$this->add_freeform(); |
|
103 |
return false; |
|
104 |
} |
|
105 |
||
106 |
/* |
|
107 |
* Otherwise we have a problem |
|
108 |
* This is an error |
|
109 |
* |
|
110 |
* we have options |
|
111 |
* - treat it all as freeform text |
|
112 |
* - assume an implicit closer (easiest when not nesting) |
|
113 |
*/ |
|
114 |
||
115 |
// for the easy case we'll assume an implicit closer. |
|
116 |
if ( 1 === $stack_depth ) { |
|
117 |
$this->add_block_from_stack(); |
|
118 |
return false; |
|
119 |
} |
|
120 |
||
121 |
/* |
|
122 |
* for the nested case where it's more difficult we'll |
|
123 |
* have to assume that multiple closers are missing |
|
124 |
* and so we'll collapse the whole stack piecewise |
|
125 |
*/ |
|
126 |
while ( 0 < count( $this->stack ) ) { |
|
127 |
$this->add_block_from_stack(); |
|
128 |
} |
|
129 |
return false; |
|
130 |
||
131 |
case 'void-block': |
|
132 |
/* |
|
133 |
* easy case is if we stumbled upon a void block |
|
134 |
* in the top-level of the document |
|
135 |
*/ |
|
136 |
if ( 0 === $stack_depth ) { |
|
137 |
if ( isset( $leading_html_start ) ) { |
|
18 | 138 |
$this->output[] = (array) $this->freeform( |
9 | 139 |
substr( |
140 |
$this->document, |
|
141 |
$leading_html_start, |
|
142 |
$start_offset - $leading_html_start |
|
143 |
) |
|
144 |
); |
|
145 |
} |
|
146 |
||
147 |
$this->output[] = (array) new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ); |
|
148 |
$this->offset = $start_offset + $token_length; |
|
149 |
return true; |
|
150 |
} |
|
151 |
||
152 |
// otherwise we found an inner block. |
|
153 |
$this->add_inner_block( |
|
154 |
new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ), |
|
155 |
$start_offset, |
|
156 |
$token_length |
|
157 |
); |
|
158 |
$this->offset = $start_offset + $token_length; |
|
159 |
return true; |
|
160 |
||
161 |
case 'block-opener': |
|
162 |
// track all newly-opened blocks on the stack. |
|
163 |
array_push( |
|
164 |
$this->stack, |
|
165 |
new WP_Block_Parser_Frame( |
|
166 |
new WP_Block_Parser_Block( $block_name, $attrs, array(), '', array() ), |
|
167 |
$start_offset, |
|
168 |
$token_length, |
|
169 |
$start_offset + $token_length, |
|
170 |
$leading_html_start |
|
171 |
) |
|
172 |
); |
|
173 |
$this->offset = $start_offset + $token_length; |
|
174 |
return true; |
|
175 |
||
176 |
case 'block-closer': |
|
177 |
/* |
|
178 |
* if we're missing an opener we're in trouble |
|
179 |
* This is an error |
|
180 |
*/ |
|
181 |
if ( 0 === $stack_depth ) { |
|
182 |
/* |
|
183 |
* we have options |
|
184 |
* - assume an implicit opener |
|
185 |
* - assume _this_ is the opener |
|
186 |
* - give up and close out the document |
|
187 |
*/ |
|
188 |
$this->add_freeform(); |
|
189 |
return false; |
|
190 |
} |
|
191 |
||
192 |
// if we're not nesting then this is easy - close the block. |
|
193 |
if ( 1 === $stack_depth ) { |
|
194 |
$this->add_block_from_stack( $start_offset ); |
|
195 |
$this->offset = $start_offset + $token_length; |
|
196 |
return true; |
|
197 |
} |
|
198 |
||
199 |
/* |
|
200 |
* otherwise we're nested and we have to close out the current |
|
201 |
* block and add it as a new innerBlock to the parent |
|
202 |
*/ |
|
203 |
$stack_top = array_pop( $this->stack ); |
|
204 |
$html = substr( $this->document, $stack_top->prev_offset, $start_offset - $stack_top->prev_offset ); |
|
205 |
$stack_top->block->innerHTML .= $html; |
|
206 |
$stack_top->block->innerContent[] = $html; |
|
207 |
$stack_top->prev_offset = $start_offset + $token_length; |
|
208 |
||
209 |
$this->add_inner_block( |
|
210 |
$stack_top->block, |
|
211 |
$stack_top->token_start, |
|
212 |
$stack_top->token_length, |
|
213 |
$start_offset + $token_length |
|
214 |
); |
|
215 |
$this->offset = $start_offset + $token_length; |
|
216 |
return true; |
|
217 |
||
218 |
default: |
|
219 |
// This is an error. |
|
220 |
$this->add_freeform(); |
|
221 |
return false; |
|
222 |
} |
|
223 |
} |
|
224 |
||
225 |
/** |
|
226 |
* Scans the document from where we last left off |
|
227 |
* and finds the next valid token to parse if it exists |
|
228 |
* |
|
229 |
* Returns the type of the find: kind of find, block information, attributes |
|
230 |
* |
|
231 |
* @internal |
|
18 | 232 |
* @since 5.0.0 |
9 | 233 |
* @since 4.6.1 fixed a bug in attribute parsing which caused catastrophic backtracking on invalid block comments |
234 |
* @return array |
|
235 |
*/ |
|
21
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
236 |
public function next_token() { |
9 | 237 |
$matches = null; |
238 |
||
239 |
/* |
|
240 |
* aye the magic |
|
241 |
* we're using a single RegExp to tokenize the block comment delimiters |
|
242 |
* we're also using a trick here because the only difference between a |
|
243 |
* block opener and a block closer is the leading `/` before `wp:` (and |
|
244 |
* a closer has no attributes). we can trap them both and process the |
|
245 |
* match back in PHP to see which one it was. |
|
246 |
*/ |
|
247 |
$has_match = preg_match( |
|
248 |
'/<!--\s+(?P<closer>\/)?wp:(?P<namespace>[a-z][a-z0-9_-]*\/)?(?P<name>[a-z][a-z0-9_-]*)\s+(?P<attrs>{(?:(?:[^}]+|}+(?=})|(?!}\s+\/?-->).)*+)?}\s+)?(?P<void>\/)?-->/s', |
|
249 |
$this->document, |
|
250 |
$matches, |
|
251 |
PREG_OFFSET_CAPTURE, |
|
252 |
$this->offset |
|
253 |
); |
|
254 |
||
255 |
// if we get here we probably have catastrophic backtracking or out-of-memory in the PCRE. |
|
256 |
if ( false === $has_match ) { |
|
257 |
return array( 'no-more-tokens', null, null, null, null ); |
|
258 |
} |
|
259 |
||
260 |
// we have no more tokens. |
|
261 |
if ( 0 === $has_match ) { |
|
262 |
return array( 'no-more-tokens', null, null, null, null ); |
|
263 |
} |
|
264 |
||
265 |
list( $match, $started_at ) = $matches[0]; |
|
266 |
||
267 |
$length = strlen( $match ); |
|
268 |
$is_closer = isset( $matches['closer'] ) && -1 !== $matches['closer'][1]; |
|
269 |
$is_void = isset( $matches['void'] ) && -1 !== $matches['void'][1]; |
|
270 |
$namespace = $matches['namespace']; |
|
271 |
$namespace = ( isset( $namespace ) && -1 !== $namespace[1] ) ? $namespace[0] : 'core/'; |
|
272 |
$name = $namespace . $matches['name'][0]; |
|
273 |
$has_attrs = isset( $matches['attrs'] ) && -1 !== $matches['attrs'][1]; |
|
274 |
||
275 |
/* |
|
276 |
* Fun fact! It's not trivial in PHP to create "an empty associative array" since all arrays |
|
277 |
* are associative arrays. If we use `array()` we get a JSON `[]` |
|
278 |
*/ |
|
279 |
$attrs = $has_attrs |
|
280 |
? json_decode( $matches['attrs'][0], /* as-associative */ true ) |
|
21
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
281 |
: array(); |
9 | 282 |
|
283 |
/* |
|
284 |
* This state isn't allowed |
|
285 |
* This is an error |
|
286 |
*/ |
|
287 |
if ( $is_closer && ( $is_void || $has_attrs ) ) { |
|
288 |
// we can ignore them since they don't hurt anything. |
|
289 |
} |
|
290 |
||
291 |
if ( $is_void ) { |
|
292 |
return array( 'void-block', $name, $attrs, $started_at, $length ); |
|
293 |
} |
|
294 |
||
295 |
if ( $is_closer ) { |
|
296 |
return array( 'block-closer', $name, null, $started_at, $length ); |
|
297 |
} |
|
298 |
||
299 |
return array( 'block-opener', $name, $attrs, $started_at, $length ); |
|
300 |
} |
|
301 |
||
302 |
/** |
|
303 |
* Returns a new block object for freeform HTML |
|
304 |
* |
|
305 |
* @internal |
|
306 |
* @since 3.9.0 |
|
307 |
* |
|
21
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
308 |
* @param string $inner_html HTML content of block. |
9 | 309 |
* @return WP_Block_Parser_Block freeform block object. |
310 |
*/ |
|
21
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
311 |
public function freeform( $inner_html ) { |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
312 |
return new WP_Block_Parser_Block( null, array(), array(), $inner_html, array( $inner_html ) ); |
9 | 313 |
} |
314 |
||
315 |
/** |
|
316 |
* Pushes a length of text from the input document |
|
317 |
* to the output list as a freeform block. |
|
318 |
* |
|
319 |
* @internal |
|
18 | 320 |
* @since 5.0.0 |
9 | 321 |
* @param null $length how many bytes of document text to output. |
322 |
*/ |
|
21
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
323 |
public function add_freeform( $length = null ) { |
9 | 324 |
$length = $length ? $length : strlen( $this->document ) - $this->offset; |
325 |
||
326 |
if ( 0 === $length ) { |
|
327 |
return; |
|
328 |
} |
|
329 |
||
18 | 330 |
$this->output[] = (array) $this->freeform( substr( $this->document, $this->offset, $length ) ); |
9 | 331 |
} |
332 |
||
333 |
/** |
|
334 |
* Given a block structure from memory pushes |
|
335 |
* a new block to the output list. |
|
336 |
* |
|
337 |
* @internal |
|
18 | 338 |
* @since 5.0.0 |
9 | 339 |
* @param WP_Block_Parser_Block $block The block to add to the output. |
340 |
* @param int $token_start Byte offset into the document where the first token for the block starts. |
|
341 |
* @param int $token_length Byte length of entire block from start of opening token to end of closing token. |
|
342 |
* @param int|null $last_offset Last byte offset into document if continuing form earlier output. |
|
343 |
*/ |
|
21
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
344 |
public function add_inner_block( WP_Block_Parser_Block $block, $token_start, $token_length, $last_offset = null ) { |
9 | 345 |
$parent = $this->stack[ count( $this->stack ) - 1 ]; |
346 |
$parent->block->innerBlocks[] = (array) $block; |
|
347 |
$html = substr( $this->document, $parent->prev_offset, $token_start - $parent->prev_offset ); |
|
348 |
||
349 |
if ( ! empty( $html ) ) { |
|
350 |
$parent->block->innerHTML .= $html; |
|
351 |
$parent->block->innerContent[] = $html; |
|
352 |
} |
|
353 |
||
354 |
$parent->block->innerContent[] = null; |
|
355 |
$parent->prev_offset = $last_offset ? $last_offset : $token_start + $token_length; |
|
356 |
} |
|
357 |
||
358 |
/** |
|
359 |
* Pushes the top block from the parsing stack to the output list. |
|
360 |
* |
|
361 |
* @internal |
|
18 | 362 |
* @since 5.0.0 |
9 | 363 |
* @param int|null $end_offset byte offset into document for where we should stop sending text output as HTML. |
364 |
*/ |
|
21
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
365 |
public function add_block_from_stack( $end_offset = null ) { |
9 | 366 |
$stack_top = array_pop( $this->stack ); |
367 |
$prev_offset = $stack_top->prev_offset; |
|
368 |
||
369 |
$html = isset( $end_offset ) |
|
370 |
? substr( $this->document, $prev_offset, $end_offset - $prev_offset ) |
|
371 |
: substr( $this->document, $prev_offset ); |
|
372 |
||
373 |
if ( ! empty( $html ) ) { |
|
374 |
$stack_top->block->innerHTML .= $html; |
|
375 |
$stack_top->block->innerContent[] = $html; |
|
376 |
} |
|
377 |
||
378 |
if ( isset( $stack_top->leading_html_start ) ) { |
|
18 | 379 |
$this->output[] = (array) $this->freeform( |
9 | 380 |
substr( |
381 |
$this->document, |
|
382 |
$stack_top->leading_html_start, |
|
383 |
$stack_top->token_start - $stack_top->leading_html_start |
|
384 |
) |
|
385 |
); |
|
386 |
} |
|
387 |
||
388 |
$this->output[] = (array) $stack_top->block; |
|
389 |
} |
|
390 |
} |
|
21
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
391 |
|
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
392 |
/** |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
393 |
* WP_Block_Parser_Block class. |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
394 |
* |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
395 |
* Required for backward compatibility in WordPress Core. |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
396 |
*/ |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
397 |
require_once __DIR__ . '/class-wp-block-parser-block.php'; |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
398 |
|
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
399 |
/** |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
400 |
* WP_Block_Parser_Frame class. |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
401 |
* |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
402 |
* Required for backward compatibility in WordPress Core. |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
403 |
*/ |
48c4eec2b7e6
Add CLAUDE.md documentation and sync WordPress core files
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
404 |
require_once __DIR__ . '/class-wp-block-parser-frame.php'; |