changeset 22 | 8c2e4d02f4ef |
parent 21 | 48c4eec2b7e6 |
21:48c4eec2b7e6 | 22:8c2e4d02f4ef |
---|---|
95 * |
95 * |
96 * If any unsupported element appears in the HTML input the HTML Processor |
96 * If any unsupported element appears in the HTML input the HTML Processor |
97 * will abort early and stop all processing. This draconian measure ensures |
97 * will abort early and stop all processing. This draconian measure ensures |
98 * that the HTML Processor won't break any HTML it doesn't fully understand. |
98 * that the HTML Processor won't break any HTML it doesn't fully understand. |
99 * |
99 * |
100 * The following list specifies the HTML tags that _are_ supported: |
100 * The HTML Processor supports all elements other than a specific set: |
101 * |
101 * |
102 * - Containers: ADDRESS, BLOCKQUOTE, DETAILS, DIALOG, DIV, FOOTER, HEADER, MAIN, MENU, SPAN, SUMMARY. |
102 * - Any element inside a TABLE. |
103 * - Custom elements: All custom elements are supported. :) |
103 * - Any element inside foreign content, including SVG and MATH. |
104 * - Form elements: BUTTON, DATALIST, FIELDSET, INPUT, LABEL, LEGEND, METER, PROGRESS, SEARCH. |
104 * - Any element outside the IN BODY insertion mode, e.g. doctype declarations, meta, links. |
105 * - Formatting elements: B, BIG, CODE, EM, FONT, I, PRE, SMALL, STRIKE, STRONG, TT, U, WBR. |
|
106 * - Heading elements: H1, H2, H3, H4, H5, H6, HGROUP. |
|
107 * - Links: A. |
|
108 * - Lists: DD, DL, DT, LI, OL, UL. |
|
109 * - Media elements: AUDIO, CANVAS, EMBED, FIGCAPTION, FIGURE, IMG, MAP, PICTURE, SOURCE, TRACK, VIDEO. |
|
110 * - Paragraph: BR, P. |
|
111 * - Phrasing elements: ABBR, AREA, BDI, BDO, CITE, DATA, DEL, DFN, INS, MARK, OUTPUT, Q, SAMP, SUB, SUP, TIME, VAR. |
|
112 * - Sectioning elements: ARTICLE, ASIDE, HR, NAV, SECTION. |
|
113 * - Templating elements: SLOT. |
|
114 * - Text decoration: RUBY. |
|
115 * - Deprecated elements: ACRONYM, BLINK, CENTER, DIR, ISINDEX, KEYGEN, LISTING, MULTICOL, NEXTID, PARAM, SPACER. |
|
116 * |
105 * |
117 * ### Supported markup |
106 * ### Supported markup |
118 * |
107 * |
119 * Some kinds of non-normative HTML involve reconstruction of formatting elements and |
108 * Some kinds of non-normative HTML involve reconstruction of formatting elements and |
120 * re-parenting of mis-nested elements. For example, a DIV tag found inside a TABLE |
109 * re-parenting of mis-nested elements. For example, a DIV tag found inside a TABLE |
121 * may in fact belong _before_ the table in the DOM. If the HTML Processor encounters |
110 * may in fact belong _before_ the table in the DOM. If the HTML Processor encounters |
122 * such a case it will stop processing. |
111 * such a case it will stop processing. |
123 * |
112 * |
124 * The following list specifies HTML markup that _is_ supported: |
113 * The following list illustrates some common examples of unexpected HTML inputs that |
114 * the HTML Processor properly parses and represents: |
|
125 * |
115 * |
126 * - Markup involving only those tags listed above. |
116 * - HTML with optional tags omitted, e.g. `<p>one<p>two`. |
127 * - Fully-balanced and non-overlapping tags. |
117 * - HTML with unexpected tag closers, e.g. `<p>one </span> more</p>`. |
128 * - HTML with unexpected tag closers. |
118 * - Non-void tags with self-closing flag, e.g. `<div/>the DIV is still open.</div>`. |
129 * - Some unbalanced or overlapping tags. |
119 * - Heading elements which close open heading elements of another level, e.g. `<h1>Closed by </h2>`. |
130 * - P tags after unclosed P tags. |
120 * - Elements containing text that looks like other tags but isn't, e.g. `<title>The <img> is plaintext</title>`. |
131 * - BUTTON tags after unclosed BUTTON tags. |
121 * - SCRIPT and STYLE tags containing text that looks like HTML but isn't, e.g. `<script>document.write('<p>Hi</p>');</script>`. |
132 * - A tags after unclosed A tags that don't involve any active formatting elements. |
122 * - SCRIPT content which has been escaped, e.g. `<script><!-- document.write('<script>console.log("hi")</script>') --></script>`. |
123 * |
|
124 * ### Unsupported Features |
|
125 * |
|
126 * This parser does not report parse errors. |
|
127 * |
|
128 * Normally, when additional HTML or BODY tags are encountered in a document, if there |
|
129 * are any additional attributes on them that aren't found on the previous elements, |
|
130 * the existing HTML and BODY elements adopt those missing attribute values. This |
|
131 * parser does not add those additional attributes. |
|
132 * |
|
133 * In certain situations, elements are moved to a different part of the document in |
|
134 * a process called "adoption" and "fostering." Because the nodes move to a location |
|
135 * in the document that the parser had already processed, this parser does not support |
|
136 * these situations and will bail. |
|
133 * |
137 * |
134 * @since 6.4.0 |
138 * @since 6.4.0 |
135 * |
139 * |
136 * @see WP_HTML_Tag_Processor |
140 * @see WP_HTML_Tag_Processor |
137 * @see https://html.spec.whatwg.org/ |
141 * @see https://html.spec.whatwg.org/ |
157 * |
161 * |
158 * @since 6.4.0 |
162 * @since 6.4.0 |
159 * |
163 * |
160 * @var WP_HTML_Processor_State |
164 * @var WP_HTML_Processor_State |
161 */ |
165 */ |
162 private $state = null; |
166 private $state; |
163 |
167 |
164 /** |
168 /** |
165 * Used to create unique bookmark names. |
169 * Used to create unique bookmark names. |
166 * |
170 * |
167 * This class sets a bookmark for every tag in the HTML document that it encounters. |
171 * This class sets a bookmark for every tag in the HTML document that it encounters. |
187 * @var string|null |
191 * @var string|null |
188 */ |
192 */ |
189 private $last_error = null; |
193 private $last_error = null; |
190 |
194 |
191 /** |
195 /** |
196 * Stores context for why the parser bailed on unsupported HTML, if it did. |
|
197 * |
|
198 * @see self::get_unsupported_exception |
|
199 * |
|
200 * @since 6.7.0 |
|
201 * |
|
202 * @var WP_HTML_Unsupported_Exception|null |
|
203 */ |
|
204 private $unsupported_exception = null; |
|
205 |
|
206 /** |
|
192 * Releases a bookmark when PHP garbage-collects its wrapping WP_HTML_Token instance. |
207 * Releases a bookmark when PHP garbage-collects its wrapping WP_HTML_Token instance. |
193 * |
208 * |
194 * This function is created inside the class constructor so that it can be passed to |
209 * This function is created inside the class constructor so that it can be passed to |
195 * the stack of open elements and the stack of active formatting elements without |
210 * the stack of open elements and the stack of active formatting elements without |
196 * exposing it as a public method on the class. |
211 * exposing it as a public method on the class. |
197 * |
212 * |
198 * @since 6.4.0 |
213 * @since 6.4.0 |
199 * |
214 * |
200 * @var closure |
215 * @var Closure|null |
201 */ |
216 */ |
202 private $release_internal_bookmark_on_destruct = null; |
217 private $release_internal_bookmark_on_destruct = null; |
203 |
218 |
204 /** |
219 /** |
205 * Stores stack events which arise during parsing of the |
220 * Stores stack events which arise during parsing of the |
208 * @since 6.6.0 |
223 * @since 6.6.0 |
209 * |
224 * |
210 * @var WP_HTML_Stack_Event[] |
225 * @var WP_HTML_Stack_Event[] |
211 */ |
226 */ |
212 private $element_queue = array(); |
227 private $element_queue = array(); |
228 |
|
229 /** |
|
230 * Stores the current breadcrumbs. |
|
231 * |
|
232 * @since 6.7.0 |
|
233 * |
|
234 * @var string[] |
|
235 */ |
|
236 private $breadcrumbs = array(); |
|
213 |
237 |
214 /** |
238 /** |
215 * Current stack event, if set, representing a matched token. |
239 * Current stack event, if set, representing a matched token. |
216 * |
240 * |
217 * Because the parser may internally point to a place further along in a document |
241 * Because the parser may internally point to a place further along in a document |
219 * appeared while scanning the HTML document), this will point at the "current" node |
243 * appeared while scanning the HTML document), this will point at the "current" node |
220 * being processed. It comes from the front of the element queue. |
244 * being processed. It comes from the front of the element queue. |
221 * |
245 * |
222 * @since 6.6.0 |
246 * @since 6.6.0 |
223 * |
247 * |
224 * @var ?WP_HTML_Stack_Event |
248 * @var WP_HTML_Stack_Event|null |
225 */ |
249 */ |
226 private $current_element = null; |
250 private $current_element = null; |
227 |
251 |
228 /** |
252 /** |
229 * Context node if created as a fragment parser. |
253 * Context node if created as a fragment parser. |
230 * |
254 * |
231 * @var ?WP_HTML_Token |
255 * @var WP_HTML_Token|null |
232 */ |
256 */ |
233 private $context_node = null; |
257 private $context_node = null; |
234 |
|
235 /** |
|
236 * Whether the parser has yet processed the context node, |
|
237 * if created as a fragment parser. |
|
238 * |
|
239 * The context node will be initially pushed onto the stack of open elements, |
|
240 * but when created as a fragment parser, this context element (and the implicit |
|
241 * HTML document node above it) should not be exposed as a matched token or node. |
|
242 * |
|
243 * This boolean indicates whether the processor should skip over the current |
|
244 * node in its initial search for the first node created from the input HTML. |
|
245 * |
|
246 * @var bool |
|
247 */ |
|
248 private $has_seen_context_node = false; |
|
249 |
258 |
250 /* |
259 /* |
251 * Public Interface Functions |
260 * Public Interface Functions |
252 */ |
261 */ |
253 |
262 |
286 public static function create_fragment( $html, $context = '<body>', $encoding = 'UTF-8' ) { |
295 public static function create_fragment( $html, $context = '<body>', $encoding = 'UTF-8' ) { |
287 if ( '<body>' !== $context || 'UTF-8' !== $encoding ) { |
296 if ( '<body>' !== $context || 'UTF-8' !== $encoding ) { |
288 return null; |
297 return null; |
289 } |
298 } |
290 |
299 |
291 $processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE ); |
300 $context_processor = static::create_full_parser( "<!DOCTYPE html>{$context}", $encoding ); |
292 $processor->state->context_node = array( 'BODY', array() ); |
301 if ( null === $context_processor ) { |
293 $processor->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; |
302 return null; |
294 |
303 } |
295 // @todo Create "fake" bookmarks for non-existent but implied nodes. |
304 |
296 $processor->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 ); |
305 while ( $context_processor->next_tag() ) { |
297 $processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 ); |
306 if ( ! $context_processor->is_virtual() ) { |
298 |
307 $context_processor->set_bookmark( 'final_node' ); |
299 $processor->state->stack_of_open_elements->push( |
308 } |
300 new WP_HTML_Token( |
309 } |
301 'root-node', |
310 |
302 'HTML', |
311 if ( |
303 false |
312 ! $context_processor->has_bookmark( 'final_node' ) || |
304 ) |
313 ! $context_processor->seek( 'final_node' ) |
305 ); |
314 ) { |
306 |
315 _doing_it_wrong( __METHOD__, __( 'No valid context element was detected.' ), '6.8.0' ); |
307 $context_node = new WP_HTML_Token( |
316 return null; |
308 'context-node', |
317 } |
309 $processor->state->context_node[0], |
318 |
310 false |
319 return $context_processor->create_fragment_at_current_node( $html ); |
311 ); |
320 } |
312 |
321 |
313 $processor->state->stack_of_open_elements->push( $context_node ); |
322 /** |
314 $processor->context_node = $context_node; |
323 * Creates an HTML processor in the full parsing mode. |
324 * |
|
325 * It's likely that a fragment parser is more appropriate, unless sending an |
|
326 * entire HTML document from start to finish. Consider a fragment parser with |
|
327 * a context node of `<body>`. |
|
328 * |
|
329 * UTF-8 is the only allowed encoding. If working with a document that |
|
330 * isn't UTF-8, first convert the document to UTF-8, then pass in the |
|
331 * converted HTML. |
|
332 * |
|
333 * @param string $html Input HTML document to process. |
|
334 * @param string|null $known_definite_encoding Optional. If provided, specifies the charset used |
|
335 * in the input byte stream. Currently must be UTF-8. |
|
336 * @return static|null The created processor if successful, otherwise null. |
|
337 */ |
|
338 public static function create_full_parser( $html, $known_definite_encoding = 'UTF-8' ) { |
|
339 if ( 'UTF-8' !== $known_definite_encoding ) { |
|
340 return null; |
|
341 } |
|
342 |
|
343 $processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE ); |
|
344 $processor->state->encoding = $known_definite_encoding; |
|
345 $processor->state->encoding_confidence = 'certain'; |
|
315 |
346 |
316 return $processor; |
347 return $processor; |
317 } |
348 } |
318 |
349 |
319 /** |
350 /** |
346 } |
377 } |
347 |
378 |
348 $this->state = new WP_HTML_Processor_State(); |
379 $this->state = new WP_HTML_Processor_State(); |
349 |
380 |
350 $this->state->stack_of_open_elements->set_push_handler( |
381 $this->state->stack_of_open_elements->set_push_handler( |
351 function ( WP_HTML_Token $token ) { |
382 function ( WP_HTML_Token $token ): void { |
352 $is_virtual = ! isset( $this->state->current_token ) || $this->is_tag_closer(); |
383 $is_virtual = ! isset( $this->state->current_token ) || $this->is_tag_closer(); |
353 $same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name; |
384 $same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name; |
354 $provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real'; |
385 $provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real'; |
355 $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH, $provenance ); |
386 $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::PUSH, $provenance ); |
387 |
|
388 $this->change_parsing_namespace( $token->integration_node_type ? 'html' : $token->namespace ); |
|
356 } |
389 } |
357 ); |
390 ); |
358 |
391 |
359 $this->state->stack_of_open_elements->set_pop_handler( |
392 $this->state->stack_of_open_elements->set_pop_handler( |
360 function ( WP_HTML_Token $token ) { |
393 function ( WP_HTML_Token $token ): void { |
361 $is_virtual = ! isset( $this->state->current_token ) || ! $this->is_tag_closer(); |
394 $is_virtual = ! isset( $this->state->current_token ) || ! $this->is_tag_closer(); |
362 $same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name; |
395 $same_node = isset( $this->state->current_token ) && $token->node_name === $this->state->current_token->node_name; |
363 $provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real'; |
396 $provenance = ( ! $same_node || $is_virtual ) ? 'virtual' : 'real'; |
364 $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::POP, $provenance ); |
397 $this->element_queue[] = new WP_HTML_Stack_Event( $token, WP_HTML_Stack_Event::POP, $provenance ); |
398 |
|
399 $adjusted_current_node = $this->get_adjusted_current_node(); |
|
400 |
|
401 if ( $adjusted_current_node ) { |
|
402 $this->change_parsing_namespace( $adjusted_current_node->integration_node_type ? 'html' : $adjusted_current_node->namespace ); |
|
403 } else { |
|
404 $this->change_parsing_namespace( 'html' ); |
|
405 } |
|
365 } |
406 } |
366 ); |
407 ); |
367 |
408 |
368 /* |
409 /* |
369 * Create this wrapper so that it's possible to pass |
410 * Create this wrapper so that it's possible to pass |
370 * a private method into WP_HTML_Token classes without |
411 * a private method into WP_HTML_Token classes without |
371 * exposing it to any public API. |
412 * exposing it to any public API. |
372 */ |
413 */ |
373 $this->release_internal_bookmark_on_destruct = function ( $name ) { |
414 $this->release_internal_bookmark_on_destruct = function ( string $name ): void { |
374 parent::release_bookmark( $name ); |
415 parent::release_bookmark( $name ); |
375 }; |
416 }; |
417 } |
|
418 |
|
419 /** |
|
420 * Creates a fragment processor at the current node. |
|
421 * |
|
422 * HTML Fragment parsing always happens with a context node. HTML Fragment Processors can be |
|
423 * instantiated with a `BODY` context node via `WP_HTML_Processor::create_fragment( $html )`. |
|
424 * |
|
425 * The context node may impact how a fragment of HTML is parsed. For example, consider the HTML |
|
426 * fragment `<td />Inside TD?</td>`. |
|
427 * |
|
428 * A BODY context node will produce the following tree: |
|
429 * |
|
430 * └─#text Inside TD? |
|
431 * |
|
432 * Notice that the `<td>` tags are completely ignored. |
|
433 * |
|
434 * Compare that with an SVG context node that produces the following tree: |
|
435 * |
|
436 * ├─svg:td |
|
437 * └─#text Inside TD? |
|
438 * |
|
439 * Here, a `td` node in the `svg` namespace is created, and its self-closing flag is respected. |
|
440 * This is a peculiarity of parsing HTML in foreign content like SVG. |
|
441 * |
|
442 * Finally, consider the tree produced with a TABLE context node: |
|
443 * |
|
444 * └─TBODY |
|
445 * └─TR |
|
446 * └─TD |
|
447 * └─#text Inside TD? |
|
448 * |
|
449 * These examples demonstrate how important the context node may be when processing an HTML |
|
450 * fragment. Special care must be taken when processing fragments that are expected to appear |
|
451 * in specific contexts. SVG and TABLE are good examples, but there are others. |
|
452 * |
|
453 * @see https://html.spec.whatwg.org/multipage/parsing.html#html-fragment-parsing-algorithm |
|
454 * |
|
455 * @since 6.8.0 |
|
456 * |
|
457 * @param string $html Input HTML fragment to process. |
|
458 * @return static|null The created processor if successful, otherwise null. |
|
459 */ |
|
460 private function create_fragment_at_current_node( string $html ) { |
|
461 if ( $this->get_token_type() !== '#tag' || $this->is_tag_closer() ) { |
|
462 _doing_it_wrong( |
|
463 __METHOD__, |
|
464 __( 'The context element must be a start tag.' ), |
|
465 '6.8.0' |
|
466 ); |
|
467 return null; |
|
468 } |
|
469 |
|
470 $tag_name = $this->current_element->token->node_name; |
|
471 $namespace = $this->current_element->token->namespace; |
|
472 |
|
473 if ( 'html' === $namespace && self::is_void( $tag_name ) ) { |
|
474 _doing_it_wrong( |
|
475 __METHOD__, |
|
476 sprintf( |
|
477 // translators: %s: A tag name like INPUT or BR. |
|
478 __( 'The context element cannot be a void element, found "%s".' ), |
|
479 $tag_name |
|
480 ), |
|
481 '6.8.0' |
|
482 ); |
|
483 return null; |
|
484 } |
|
485 |
|
486 /* |
|
487 * Prevent creating fragments at nodes that require a special tokenizer state. |
|
488 * This is unsupported by the HTML Processor. |
|
489 */ |
|
490 if ( |
|
491 'html' === $namespace && |
|
492 in_array( $tag_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP', 'PLAINTEXT' ), true ) |
|
493 ) { |
|
494 _doing_it_wrong( |
|
495 __METHOD__, |
|
496 sprintf( |
|
497 // translators: %s: A tag name like IFRAME or TEXTAREA. |
|
498 __( 'The context element "%s" is not supported.' ), |
|
499 $tag_name |
|
500 ), |
|
501 '6.8.0' |
|
502 ); |
|
503 return null; |
|
504 } |
|
505 |
|
506 $fragment_processor = new static( $html, self::CONSTRUCTOR_UNLOCK_CODE ); |
|
507 |
|
508 $fragment_processor->compat_mode = $this->compat_mode; |
|
509 |
|
510 // @todo Create "fake" bookmarks for non-existent but implied nodes. |
|
511 $fragment_processor->bookmarks['root-node'] = new WP_HTML_Span( 0, 0 ); |
|
512 $root_node = new WP_HTML_Token( |
|
513 'root-node', |
|
514 'HTML', |
|
515 false |
|
516 ); |
|
517 $fragment_processor->state->stack_of_open_elements->push( $root_node ); |
|
518 |
|
519 $fragment_processor->bookmarks['context-node'] = new WP_HTML_Span( 0, 0 ); |
|
520 $fragment_processor->context_node = clone $this->current_element->token; |
|
521 $fragment_processor->context_node->bookmark_name = 'context-node'; |
|
522 $fragment_processor->context_node->on_destroy = null; |
|
523 |
|
524 $fragment_processor->breadcrumbs = array( 'HTML', $fragment_processor->context_node->node_name ); |
|
525 |
|
526 if ( 'TEMPLATE' === $fragment_processor->context_node->node_name ) { |
|
527 $fragment_processor->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; |
|
528 } |
|
529 |
|
530 $fragment_processor->reset_insertion_mode_appropriately(); |
|
531 |
|
532 /* |
|
533 * > Set the parser's form element pointer to the nearest node to the context element that |
|
534 * > is a form element (going straight up the ancestor chain, and including the element |
|
535 * > itself, if it is a form element), if any. (If there is no such form element, the |
|
536 * > form element pointer keeps its initial value, null.) |
|
537 */ |
|
538 foreach ( $this->state->stack_of_open_elements->walk_up() as $element ) { |
|
539 if ( 'FORM' === $element->node_name && 'html' === $element->namespace ) { |
|
540 $fragment_processor->state->form_element = clone $element; |
|
541 $fragment_processor->state->form_element->bookmark_name = null; |
|
542 $fragment_processor->state->form_element->on_destroy = null; |
|
543 break; |
|
544 } |
|
545 } |
|
546 |
|
547 $fragment_processor->state->encoding_confidence = 'irrelevant'; |
|
548 |
|
549 /* |
|
550 * Update the parsing namespace near the end of the process. |
|
551 * This is important so that any push/pop from the stack of open |
|
552 * elements does not change the parsing namespace. |
|
553 */ |
|
554 $fragment_processor->change_parsing_namespace( |
|
555 $this->current_element->token->integration_node_type ? 'html' : $namespace |
|
556 ); |
|
557 |
|
558 return $fragment_processor; |
|
559 } |
|
560 |
|
561 /** |
|
562 * Stops the parser and terminates its execution when encountering unsupported markup. |
|
563 * |
|
564 * @throws WP_HTML_Unsupported_Exception Halts execution of the parser. |
|
565 * |
|
566 * @since 6.7.0 |
|
567 * |
|
568 * @param string $message Explains support is missing in order to parse the current node. |
|
569 */ |
|
570 private function bail( string $message ) { |
|
571 $here = $this->bookmarks[ $this->state->current_token->bookmark_name ]; |
|
572 $token = substr( $this->html, $here->start, $here->length ); |
|
573 |
|
574 $open_elements = array(); |
|
575 foreach ( $this->state->stack_of_open_elements->stack as $item ) { |
|
576 $open_elements[] = $item->node_name; |
|
577 } |
|
578 |
|
579 $active_formats = array(); |
|
580 foreach ( $this->state->active_formatting_elements->walk_down() as $item ) { |
|
581 $active_formats[] = $item->node_name; |
|
582 } |
|
583 |
|
584 $this->last_error = self::ERROR_UNSUPPORTED; |
|
585 |
|
586 $this->unsupported_exception = new WP_HTML_Unsupported_Exception( |
|
587 $message, |
|
588 $this->state->current_token->node_name, |
|
589 $here->start, |
|
590 $token, |
|
591 $open_elements, |
|
592 $active_formats |
|
593 ); |
|
594 |
|
595 throw $this->unsupported_exception; |
|
376 } |
596 } |
377 |
597 |
378 /** |
598 /** |
379 * Returns the last error, if any. |
599 * Returns the last error, if any. |
380 * |
600 * |
396 * @see self::ERROR_UNSUPPORTED |
616 * @see self::ERROR_UNSUPPORTED |
397 * @see self::ERROR_EXCEEDED_MAX_BOOKMARKS |
617 * @see self::ERROR_EXCEEDED_MAX_BOOKMARKS |
398 * |
618 * |
399 * @return string|null The last error, if one exists, otherwise null. |
619 * @return string|null The last error, if one exists, otherwise null. |
400 */ |
620 */ |
401 public function get_last_error() { |
621 public function get_last_error(): ?string { |
402 return $this->last_error; |
622 return $this->last_error; |
623 } |
|
624 |
|
625 /** |
|
626 * Returns context for why the parser aborted due to unsupported HTML, if it did. |
|
627 * |
|
628 * This is meant for debugging purposes, not for production use. |
|
629 * |
|
630 * @since 6.7.0 |
|
631 * |
|
632 * @see self::$unsupported_exception |
|
633 * |
|
634 * @return WP_HTML_Unsupported_Exception|null |
|
635 */ |
|
636 public function get_unsupported_exception() { |
|
637 return $this->unsupported_exception; |
|
403 } |
638 } |
404 |
639 |
405 /** |
640 /** |
406 * Finds the next tag matching the $query. |
641 * Finds the next tag matching the $query. |
407 * |
642 * |
424 * @type string[] $breadcrumbs DOM sub-path at which element is found, e.g. `array( 'FIGURE', 'IMG' )`. |
659 * @type string[] $breadcrumbs DOM sub-path at which element is found, e.g. `array( 'FIGURE', 'IMG' )`. |
425 * May also contain the wildcard `*` which matches a single element, e.g. `array( 'SECTION', '*' )`. |
660 * May also contain the wildcard `*` which matches a single element, e.g. `array( 'SECTION', '*' )`. |
426 * } |
661 * } |
427 * @return bool Whether a tag was matched. |
662 * @return bool Whether a tag was matched. |
428 */ |
663 */ |
429 public function next_tag( $query = null ) { |
664 public function next_tag( $query = null ): bool { |
430 $visit_closers = isset( $query['tag_closers'] ) && 'visit' === $query['tag_closers']; |
665 $visit_closers = isset( $query['tag_closers'] ) && 'visit' === $query['tag_closers']; |
431 |
666 |
432 if ( null === $query ) { |
667 if ( null === $query ) { |
433 while ( $this->next_token() ) { |
668 while ( $this->next_token() ) { |
434 if ( '#tag' !== $this->get_token_type() ) { |
669 if ( '#tag' !== $this->get_token_type() ) { |
454 '6.4.0' |
689 '6.4.0' |
455 ); |
690 ); |
456 return false; |
691 return false; |
457 } |
692 } |
458 |
693 |
694 if ( isset( $query['tag_name'] ) ) { |
|
695 $query['tag_name'] = strtoupper( $query['tag_name'] ); |
|
696 } |
|
697 |
|
459 $needs_class = ( isset( $query['class_name'] ) && is_string( $query['class_name'] ) ) |
698 $needs_class = ( isset( $query['class_name'] ) && is_string( $query['class_name'] ) ) |
460 ? $query['class_name'] |
699 ? $query['class_name'] |
461 : null; |
700 : null; |
462 |
701 |
463 if ( ! ( array_key_exists( 'breadcrumbs', $query ) && is_array( $query['breadcrumbs'] ) ) ) { |
702 if ( ! ( array_key_exists( 'breadcrumbs', $query ) && is_array( $query['breadcrumbs'] ) ) ) { |
464 while ( $this->next_token() ) { |
703 while ( $this->next_token() ) { |
465 if ( '#tag' !== $this->get_token_type() ) { |
704 if ( '#tag' !== $this->get_token_type() ) { |
466 continue; |
705 continue; |
467 } |
706 } |
468 |
707 |
708 if ( isset( $query['tag_name'] ) && $query['tag_name'] !== $this->get_token_name() ) { |
|
709 continue; |
|
710 } |
|
711 |
|
469 if ( isset( $needs_class ) && ! $this->has_class( $needs_class ) ) { |
712 if ( isset( $needs_class ) && ! $this->has_class( $needs_class ) ) { |
470 continue; |
713 continue; |
471 } |
714 } |
472 |
715 |
473 if ( ! $this->is_tag_closer() || $visit_closers ) { |
716 if ( ! $this->is_tag_closer() || $visit_closers ) { |
497 |
740 |
498 return false; |
741 return false; |
499 } |
742 } |
500 |
743 |
501 /** |
744 /** |
502 * Ensures internal accounting is maintained for HTML semantic rules while |
745 * Finds the next token in the HTML document. |
503 * the underlying Tag Processor class is seeking to a bookmark. |
|
504 * |
746 * |
505 * This doesn't currently have a way to represent non-tags and doesn't process |
747 * This doesn't currently have a way to represent non-tags and doesn't process |
506 * semantic rules for text nodes. For access to the raw tokens consider using |
748 * semantic rules for text nodes. For access to the raw tokens consider using |
507 * WP_HTML_Tag_Processor instead. |
749 * WP_HTML_Tag_Processor instead. |
508 * |
750 * |
509 * @since 6.5.0 Added for internal support; do not use. |
751 * @since 6.5.0 Added for internal support; do not use. |
752 * @since 6.7.2 Refactored so subclasses may extend. |
|
753 * |
|
754 * @return bool Whether a token was parsed. |
|
755 */ |
|
756 public function next_token(): bool { |
|
757 return $this->next_visitable_token(); |
|
758 } |
|
759 |
|
760 /** |
|
761 * Ensures internal accounting is maintained for HTML semantic rules while |
|
762 * the underlying Tag Processor class is seeking to a bookmark. |
|
763 * |
|
764 * This doesn't currently have a way to represent non-tags and doesn't process |
|
765 * semantic rules for text nodes. For access to the raw tokens consider using |
|
766 * WP_HTML_Tag_Processor instead. |
|
767 * |
|
768 * Note that this method may call itself recursively. This is why it is not |
|
769 * implemented as {@see WP_HTML_Processor::next_token()}, which instead calls |
|
770 * this method similarly to how {@see WP_HTML_Tag_Processor::next_token()} |
|
771 * calls the {@see WP_HTML_Tag_Processor::base_class_next_token()} method. |
|
772 * |
|
773 * @since 6.7.2 Added for internal support. |
|
510 * |
774 * |
511 * @access private |
775 * @access private |
512 * |
776 * |
513 * @return bool |
777 * @return bool |
514 */ |
778 */ |
515 public function next_token() { |
779 private function next_visitable_token(): bool { |
516 $this->current_element = null; |
780 $this->current_element = null; |
517 |
781 |
518 if ( isset( $this->last_error ) ) { |
782 if ( isset( $this->last_error ) ) { |
519 return false; |
783 return false; |
520 } |
784 } |
521 |
785 |
522 if ( 'done' !== $this->has_seen_context_node && 0 === count( $this->element_queue ) && ! $this->step() ) { |
786 /* |
523 while ( 'context-node' !== $this->state->stack_of_open_elements->current_node()->bookmark_name && $this->state->stack_of_open_elements->pop() ) { |
787 * Prime the events if there are none. |
788 * |
|
789 * @todo In some cases, probably related to the adoption agency |
|
790 * algorithm, this call to step() doesn't create any new |
|
791 * events. Calling it again creates them. Figure out why |
|
792 * this is and if it's inherent or if it's a bug. Looping |
|
793 * until there are events or until there are no more |
|
794 * tokens works in the meantime and isn't obviously wrong. |
|
795 */ |
|
796 if ( empty( $this->element_queue ) && $this->step() ) { |
|
797 return $this->next_visitable_token(); |
|
798 } |
|
799 |
|
800 // Process the next event on the queue. |
|
801 $this->current_element = array_shift( $this->element_queue ); |
|
802 if ( ! isset( $this->current_element ) ) { |
|
803 // There are no tokens left, so close all remaining open elements. |
|
804 while ( $this->state->stack_of_open_elements->pop() ) { |
|
524 continue; |
805 continue; |
525 } |
806 } |
526 $this->has_seen_context_node = 'done'; |
807 |
527 return $this->next_token(); |
808 return empty( $this->element_queue ) ? false : $this->next_visitable_token(); |
528 } |
809 } |
529 |
810 |
530 $this->current_element = array_shift( $this->element_queue ); |
811 $is_pop = WP_HTML_Stack_Event::POP === $this->current_element->operation; |
531 while ( isset( $this->context_node ) && ! $this->has_seen_context_node ) { |
812 |
532 if ( isset( $this->current_element ) ) { |
813 /* |
533 if ( $this->context_node === $this->current_element->token && WP_HTML_Stack_Event::PUSH === $this->current_element->operation ) { |
814 * The root node only exists in the fragment parser, and closing it |
534 $this->has_seen_context_node = true; |
815 * indicates that the parse is complete. Stop before popping it from |
535 return $this->next_token(); |
816 * the breadcrumbs. |
536 } |
817 */ |
537 } |
818 if ( 'root-node' === $this->current_element->token->bookmark_name ) { |
538 $this->current_element = array_shift( $this->element_queue ); |
819 return $this->next_visitable_token(); |
539 } |
820 } |
540 |
821 |
541 if ( ! isset( $this->current_element ) ) { |
822 // Adjust the breadcrumbs for this event. |
542 if ( 'done' === $this->has_seen_context_node ) { |
823 if ( $is_pop ) { |
543 return false; |
824 array_pop( $this->breadcrumbs ); |
544 } else { |
825 } else { |
545 return $this->next_token(); |
826 $this->breadcrumbs[] = $this->current_element->token->node_name; |
546 } |
|
547 } |
|
548 |
|
549 if ( isset( $this->context_node ) && WP_HTML_Stack_Event::POP === $this->current_element->operation && $this->context_node === $this->current_element->token ) { |
|
550 $this->element_queue = array(); |
|
551 $this->current_element = null; |
|
552 return false; |
|
553 } |
827 } |
554 |
828 |
555 // Avoid sending close events for elements which don't expect a closing. |
829 // Avoid sending close events for elements which don't expect a closing. |
556 if ( |
830 if ( $is_pop && ! $this->expects_closer( $this->current_element->token ) ) { |
557 WP_HTML_Stack_Event::POP === $this->current_element->operation && |
831 return $this->next_visitable_token(); |
558 ! static::expects_closer( $this->current_element->token ) |
|
559 ) { |
|
560 return $this->next_token(); |
|
561 } |
832 } |
562 |
833 |
563 return true; |
834 return true; |
564 } |
835 } |
565 |
|
566 |
836 |
567 /** |
837 /** |
568 * Indicates if the current tag token is a tag closer. |
838 * Indicates if the current tag token is a tag closer. |
569 * |
839 * |
570 * Example: |
840 * Example: |
578 * |
848 * |
579 * @since 6.6.0 Subclassed for HTML Processor. |
849 * @since 6.6.0 Subclassed for HTML Processor. |
580 * |
850 * |
581 * @return bool Whether the current tag is a tag closer. |
851 * @return bool Whether the current tag is a tag closer. |
582 */ |
852 */ |
583 public function is_tag_closer() { |
853 public function is_tag_closer(): bool { |
584 return $this->is_virtual() |
854 return $this->is_virtual() |
585 ? ( WP_HTML_Stack_Event::POP === $this->current_element->operation && '#tag' === $this->get_token_type() ) |
855 ? ( WP_HTML_Stack_Event::POP === $this->current_element->operation && '#tag' === $this->get_token_type() ) |
586 : parent::is_tag_closer(); |
856 : parent::is_tag_closer(); |
587 } |
857 } |
588 |
858 |
592 * |
862 * |
593 * @since 6.6.0 |
863 * @since 6.6.0 |
594 * |
864 * |
595 * @return bool Whether the current token is virtual. |
865 * @return bool Whether the current token is virtual. |
596 */ |
866 */ |
597 private function is_virtual() { |
867 private function is_virtual(): bool { |
598 return ( |
868 return ( |
599 isset( $this->current_element->provenance ) && |
869 isset( $this->current_element->provenance ) && |
600 'virtual' === $this->current_element->provenance |
870 'virtual' === $this->current_element->provenance |
601 ); |
871 ); |
602 } |
872 } |
624 * |
894 * |
625 * @param string[] $breadcrumbs DOM sub-path at which element is found, e.g. `array( 'FIGURE', 'IMG' )`. |
895 * @param string[] $breadcrumbs DOM sub-path at which element is found, e.g. `array( 'FIGURE', 'IMG' )`. |
626 * May also contain the wildcard `*` which matches a single element, e.g. `array( 'SECTION', '*' )`. |
896 * May also contain the wildcard `*` which matches a single element, e.g. `array( 'SECTION', '*' )`. |
627 * @return bool Whether the currently-matched tag is found at the given nested structure. |
897 * @return bool Whether the currently-matched tag is found at the given nested structure. |
628 */ |
898 */ |
629 public function matches_breadcrumbs( $breadcrumbs ) { |
899 public function matches_breadcrumbs( $breadcrumbs ): bool { |
630 // Everything matches when there are zero constraints. |
900 // Everything matches when there are zero constraints. |
631 if ( 0 === count( $breadcrumbs ) ) { |
901 if ( 0 === count( $breadcrumbs ) ) { |
632 return true; |
902 return true; |
633 } |
903 } |
634 |
904 |
637 |
907 |
638 if ( '*' !== $crumb && $this->get_tag() !== strtoupper( $crumb ) ) { |
908 if ( '*' !== $crumb && $this->get_tag() !== strtoupper( $crumb ) ) { |
639 return false; |
909 return false; |
640 } |
910 } |
641 |
911 |
642 foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { |
912 for ( $i = count( $this->breadcrumbs ) - 1; $i >= 0; $i-- ) { |
913 $node = $this->breadcrumbs[ $i ]; |
|
643 $crumb = strtoupper( current( $breadcrumbs ) ); |
914 $crumb = strtoupper( current( $breadcrumbs ) ); |
644 |
915 |
645 if ( '*' !== $crumb && $node->node_name !== $crumb ) { |
916 if ( '*' !== $crumb && $node !== $crumb ) { |
646 return false; |
917 return false; |
647 } |
918 } |
648 |
919 |
649 if ( false === prev( $breadcrumbs ) ) { |
920 if ( false === prev( $breadcrumbs ) ) { |
650 return true; |
921 return true; |
665 * foreign content will also act just like a void tag, immediately |
936 * foreign content will also act just like a void tag, immediately |
666 * closing as soon as the processor advances to the next token. |
937 * closing as soon as the processor advances to the next token. |
667 * |
938 * |
668 * @since 6.6.0 |
939 * @since 6.6.0 |
669 * |
940 * |
670 * @todo When adding support for foreign content, ensure that |
941 * @param WP_HTML_Token|null $node Optional. Node to examine, if provided. |
671 * this returns false for self-closing elements in the |
942 * Default is to examine current node. |
672 * SVG and MathML namespace. |
943 * @return bool|null Whether to expect a closer for the currently-matched node, |
673 * |
944 * or `null` if not matched on any token. |
674 * @param ?WP_HTML_Token $node Node to examine instead of current node, if provided. |
945 */ |
675 * @return bool Whether to expect a closer for the currently-matched node, |
946 public function expects_closer( ?WP_HTML_Token $node = null ): ?bool { |
676 * or `null` if not matched on any token. |
|
677 */ |
|
678 public function expects_closer( $node = null ) { |
|
679 $token_name = $node->node_name ?? $this->get_token_name(); |
947 $token_name = $node->node_name ?? $this->get_token_name(); |
948 |
|
680 if ( ! isset( $token_name ) ) { |
949 if ( ! isset( $token_name ) ) { |
681 return null; |
950 return null; |
682 } |
951 } |
952 |
|
953 $token_namespace = $node->namespace ?? $this->get_namespace(); |
|
954 $token_has_self_closing = $node->has_self_closing_flag ?? $this->has_self_closing_flag(); |
|
683 |
955 |
684 return ! ( |
956 return ! ( |
685 // Comments, text nodes, and other atomic tokens. |
957 // Comments, text nodes, and other atomic tokens. |
686 '#' === $token_name[0] || |
958 '#' === $token_name[0] || |
687 // Doctype declarations. |
959 // Doctype declarations. |
688 'html' === $token_name || |
960 'html' === $token_name || |
689 // Void elements. |
961 // Void elements. |
690 self::is_void( $token_name ) || |
962 ( 'html' === $token_namespace && self::is_void( $token_name ) ) || |
691 // Special atomic elements. |
963 // Special atomic elements. |
692 in_array( $token_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) |
964 ( 'html' === $token_namespace && in_array( $token_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) || |
965 // Self-closing elements in foreign content. |
|
966 ( 'html' !== $token_namespace && $token_has_self_closing ) |
|
693 ); |
967 ); |
694 } |
968 } |
695 |
969 |
696 /** |
970 /** |
697 * Steps through the HTML document and stop at the next tag, if any. |
971 * Steps through the HTML document and stop at the next tag, if any. |
704 * @see self::REPROCESS_CURRENT_NODE |
978 * @see self::REPROCESS_CURRENT_NODE |
705 * |
979 * |
706 * @param string $node_to_process Whether to parse the next node or reprocess the current node. |
980 * @param string $node_to_process Whether to parse the next node or reprocess the current node. |
707 * @return bool Whether a tag was matched. |
981 * @return bool Whether a tag was matched. |
708 */ |
982 */ |
709 public function step( $node_to_process = self::PROCESS_NEXT_NODE ) { |
983 public function step( $node_to_process = self::PROCESS_NEXT_NODE ): bool { |
710 // Refuse to proceed if there was a previous error. |
984 // Refuse to proceed if there was a previous error. |
711 if ( null !== $this->last_error ) { |
985 if ( null !== $this->last_error ) { |
712 return false; |
986 return false; |
713 } |
987 } |
714 |
988 |
719 * stack-based operations such as "navigate to parent node" or checking |
993 * stack-based operations such as "navigate to parent node" or checking |
720 * on an element's breadcrumbs. |
994 * on an element's breadcrumbs. |
721 * |
995 * |
722 * When moving on to the next node, therefore, if the bottom-most element |
996 * When moving on to the next node, therefore, if the bottom-most element |
723 * on the stack is a void element, it must be closed. |
997 * on the stack is a void element, it must be closed. |
724 * |
|
725 * @todo Once self-closing foreign elements and BGSOUND are supported, |
|
726 * they must also be implicitly closed here too. BGSOUND is |
|
727 * special since it's only self-closing if the self-closing flag |
|
728 * is provided in the opening tag, otherwise it expects a tag closer. |
|
729 */ |
998 */ |
730 $top_node = $this->state->stack_of_open_elements->current_node(); |
999 $top_node = $this->state->stack_of_open_elements->current_node(); |
731 if ( isset( $top_node ) && ! static::expects_closer( $top_node ) ) { |
1000 if ( isset( $top_node ) && ! $this->expects_closer( $top_node ) ) { |
732 $this->state->stack_of_open_elements->pop(); |
1001 $this->state->stack_of_open_elements->pop(); |
733 } |
1002 } |
734 } |
1003 } |
735 |
1004 |
736 if ( self::PROCESS_NEXT_NODE === $node_to_process ) { |
1005 if ( self::PROCESS_NEXT_NODE === $node_to_process ) { |
737 parent::next_token(); |
1006 parent::next_token(); |
1007 if ( WP_HTML_Tag_Processor::STATE_TEXT_NODE === $this->parser_state ) { |
|
1008 parent::subdivide_text_appropriately(); |
|
1009 } |
|
738 } |
1010 } |
739 |
1011 |
740 // Finish stepping when there are no more tokens in the document. |
1012 // Finish stepping when there are no more tokens in the document. |
741 if ( |
1013 if ( |
742 WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state || |
1014 WP_HTML_Tag_Processor::STATE_INCOMPLETE_INPUT === $this->parser_state || |
743 WP_HTML_Tag_Processor::STATE_COMPLETE === $this->parser_state |
1015 WP_HTML_Tag_Processor::STATE_COMPLETE === $this->parser_state |
744 ) { |
1016 ) { |
745 return false; |
1017 return false; |
746 } |
1018 } |
747 |
1019 |
748 $this->state->current_token = new WP_HTML_Token( |
1020 $adjusted_current_node = $this->get_adjusted_current_node(); |
749 $this->bookmark_token(), |
1021 $is_closer = $this->is_tag_closer(); |
750 $this->get_token_name(), |
1022 $is_start_tag = WP_HTML_Tag_Processor::STATE_MATCHED_TAG === $this->parser_state && ! $is_closer; |
751 $this->has_self_closing_flag(), |
1023 $token_name = $this->get_token_name(); |
752 $this->release_internal_bookmark_on_destruct |
1024 |
1025 if ( self::REPROCESS_CURRENT_NODE !== $node_to_process ) { |
|
1026 $this->state->current_token = new WP_HTML_Token( |
|
1027 $this->bookmark_token(), |
|
1028 $token_name, |
|
1029 $this->has_self_closing_flag(), |
|
1030 $this->release_internal_bookmark_on_destruct |
|
1031 ); |
|
1032 } |
|
1033 |
|
1034 $parse_in_current_insertion_mode = ( |
|
1035 0 === $this->state->stack_of_open_elements->count() || |
|
1036 'html' === $adjusted_current_node->namespace || |
|
1037 ( |
|
1038 'math' === $adjusted_current_node->integration_node_type && |
|
1039 ( |
|
1040 ( $is_start_tag && ! in_array( $token_name, array( 'MGLYPH', 'MALIGNMARK' ), true ) ) || |
|
1041 '#text' === $token_name |
|
1042 ) |
|
1043 ) || |
|
1044 ( |
|
1045 'math' === $adjusted_current_node->namespace && |
|
1046 'ANNOTATION-XML' === $adjusted_current_node->node_name && |
|
1047 $is_start_tag && 'SVG' === $token_name |
|
1048 ) || |
|
1049 ( |
|
1050 'html' === $adjusted_current_node->integration_node_type && |
|
1051 ( $is_start_tag || '#text' === $token_name ) |
|
1052 ) |
|
753 ); |
1053 ); |
754 |
1054 |
755 try { |
1055 try { |
1056 if ( ! $parse_in_current_insertion_mode ) { |
|
1057 return $this->step_in_foreign_content(); |
|
1058 } |
|
1059 |
|
756 switch ( $this->state->insertion_mode ) { |
1060 switch ( $this->state->insertion_mode ) { |
1061 case WP_HTML_Processor_State::INSERTION_MODE_INITIAL: |
|
1062 return $this->step_initial(); |
|
1063 |
|
1064 case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML: |
|
1065 return $this->step_before_html(); |
|
1066 |
|
1067 case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD: |
|
1068 return $this->step_before_head(); |
|
1069 |
|
1070 case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD: |
|
1071 return $this->step_in_head(); |
|
1072 |
|
1073 case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD_NOSCRIPT: |
|
1074 return $this->step_in_head_noscript(); |
|
1075 |
|
1076 case WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD: |
|
1077 return $this->step_after_head(); |
|
1078 |
|
757 case WP_HTML_Processor_State::INSERTION_MODE_IN_BODY: |
1079 case WP_HTML_Processor_State::INSERTION_MODE_IN_BODY: |
758 return $this->step_in_body(); |
1080 return $this->step_in_body(); |
759 |
1081 |
1082 case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE: |
|
1083 return $this->step_in_table(); |
|
1084 |
|
1085 case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_TEXT: |
|
1086 return $this->step_in_table_text(); |
|
1087 |
|
1088 case WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION: |
|
1089 return $this->step_in_caption(); |
|
1090 |
|
1091 case WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP: |
|
1092 return $this->step_in_column_group(); |
|
1093 |
|
1094 case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY: |
|
1095 return $this->step_in_table_body(); |
|
1096 |
|
1097 case WP_HTML_Processor_State::INSERTION_MODE_IN_ROW: |
|
1098 return $this->step_in_row(); |
|
1099 |
|
1100 case WP_HTML_Processor_State::INSERTION_MODE_IN_CELL: |
|
1101 return $this->step_in_cell(); |
|
1102 |
|
1103 case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT: |
|
1104 return $this->step_in_select(); |
|
1105 |
|
1106 case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE: |
|
1107 return $this->step_in_select_in_table(); |
|
1108 |
|
1109 case WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE: |
|
1110 return $this->step_in_template(); |
|
1111 |
|
1112 case WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY: |
|
1113 return $this->step_after_body(); |
|
1114 |
|
1115 case WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET: |
|
1116 return $this->step_in_frameset(); |
|
1117 |
|
1118 case WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET: |
|
1119 return $this->step_after_frameset(); |
|
1120 |
|
1121 case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY: |
|
1122 return $this->step_after_after_body(); |
|
1123 |
|
1124 case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET: |
|
1125 return $this->step_after_after_frameset(); |
|
1126 |
|
1127 // This should be unreachable but PHP doesn't have total type checking on switch. |
|
760 default: |
1128 default: |
761 $this->last_error = self::ERROR_UNSUPPORTED; |
1129 $this->bail( "Unaware of the requested parsing mode: '{$this->state->insertion_mode}'." ); |
762 throw new WP_HTML_Unsupported_Exception( "No support for parsing in the '{$this->state->insertion_mode}' state." ); |
|
763 } |
1130 } |
764 } catch ( WP_HTML_Unsupported_Exception $e ) { |
1131 } catch ( WP_HTML_Unsupported_Exception $e ) { |
765 /* |
1132 /* |
766 * Exceptions are used in this class to escape deep call stacks that |
1133 * Exceptions are used in this class to escape deep call stacks that |
767 * otherwise might involve messier calling and return conventions. |
1134 * otherwise might involve messier calling and return conventions. |
774 * Computes the HTML breadcrumbs for the currently-matched node, if matched. |
1141 * Computes the HTML breadcrumbs for the currently-matched node, if matched. |
775 * |
1142 * |
776 * Breadcrumbs start at the outermost parent and descend toward the matched element. |
1143 * Breadcrumbs start at the outermost parent and descend toward the matched element. |
777 * They always include the entire path from the root HTML node to the matched element. |
1144 * They always include the entire path from the root HTML node to the matched element. |
778 * |
1145 * |
779 * @todo It could be more efficient to expose a generator-based version of this function |
1146 * Example: |
780 * to avoid creating the array copy on tag iteration. If this is done, it would likely |
|
781 * be more useful to walk up the stack when yielding instead of starting at the top. |
|
782 * |
|
783 * Example |
|
784 * |
1147 * |
785 * $processor = WP_HTML_Processor::create_fragment( '<p><strong><em><img></em></strong></p>' ); |
1148 * $processor = WP_HTML_Processor::create_fragment( '<p><strong><em><img></em></strong></p>' ); |
786 * $processor->next_tag( 'IMG' ); |
1149 * $processor->next_tag( 'IMG' ); |
787 * $processor->get_breadcrumbs() === array( 'HTML', 'BODY', 'P', 'STRONG', 'EM', 'IMG' ); |
1150 * $processor->get_breadcrumbs() === array( 'HTML', 'BODY', 'P', 'STRONG', 'EM', 'IMG' ); |
788 * |
1151 * |
789 * @since 6.4.0 |
1152 * @since 6.4.0 |
790 * |
1153 * |
791 * @return string[]|null Array of tag names representing path to matched node, if matched, otherwise NULL. |
1154 * @return string[] Array of tag names representing path to matched node. |
792 */ |
1155 */ |
793 public function get_breadcrumbs() { |
1156 public function get_breadcrumbs(): array { |
794 $breadcrumbs = array(); |
1157 return $this->breadcrumbs; |
795 |
|
796 foreach ( $this->state->stack_of_open_elements->walk_down() as $stack_item ) { |
|
797 $breadcrumbs[] = $stack_item->node_name; |
|
798 } |
|
799 |
|
800 if ( ! $this->is_virtual() ) { |
|
801 return $breadcrumbs; |
|
802 } |
|
803 |
|
804 foreach ( $this->element_queue as $queue_item ) { |
|
805 if ( $this->current_element->token->bookmark_name === $queue_item->token->bookmark_name ) { |
|
806 break; |
|
807 } |
|
808 |
|
809 if ( 'context-node' === $queue_item->token->bookmark_name ) { |
|
810 break; |
|
811 } |
|
812 |
|
813 if ( 'real' === $queue_item->provenance ) { |
|
814 break; |
|
815 } |
|
816 |
|
817 if ( WP_HTML_Stack_Event::PUSH === $queue_item->operation ) { |
|
818 $breadcrumbs[] = $queue_item->token->node_name; |
|
819 } else { |
|
820 array_pop( $breadcrumbs ); |
|
821 } |
|
822 } |
|
823 |
|
824 if ( null !== parent::get_token_name() && ! parent::is_tag_closer() ) { |
|
825 array_pop( $breadcrumbs ); |
|
826 } |
|
827 |
|
828 // Add the virtual node we're at. |
|
829 if ( WP_HTML_Stack_Event::PUSH === $this->current_element->operation ) { |
|
830 $breadcrumbs[] = $this->current_element->token->node_name; |
|
831 } |
|
832 |
|
833 return $breadcrumbs; |
|
834 } |
1158 } |
835 |
1159 |
836 /** |
1160 /** |
837 * Returns the nesting depth of the current location in the document. |
1161 * Returns the nesting depth of the current location in the document. |
838 * |
1162 * |
856 * |
1180 * |
857 * @since 6.6.0 |
1181 * @since 6.6.0 |
858 * |
1182 * |
859 * @return int Nesting-depth of current location in the document. |
1183 * @return int Nesting-depth of current location in the document. |
860 */ |
1184 */ |
861 public function get_current_depth() { |
1185 public function get_current_depth(): int { |
862 return $this->is_virtual() |
1186 return count( $this->breadcrumbs ); |
863 ? count( $this->get_breadcrumbs() ) |
1187 } |
864 : $this->state->stack_of_open_elements->count(); |
1188 |
865 } |
1189 /** |
866 |
1190 * Normalizes an HTML fragment by serializing it. |
867 /** |
1191 * |
868 * Parses next element in the 'in body' insertion mode. |
1192 * This method assumes that the given HTML snippet is found in BODY context. |
869 * |
1193 * For normalizing full documents or fragments found in other contexts, create |
870 * This internal function performs the 'in body' insertion mode |
1194 * a new processor using {@see WP_HTML_Processor::create_fragment} or |
1195 * {@see WP_HTML_Processor::create_full_parser} and call {@see WP_HTML_Processor::serialize} |
|
1196 * on the created instances. |
|
1197 * |
|
1198 * Many aspects of an input HTML fragment may be changed during normalization. |
|
1199 * |
|
1200 * - Attribute values will be double-quoted. |
|
1201 * - Duplicate attributes will be removed. |
|
1202 * - Omitted tags will be added. |
|
1203 * - Tag and attribute name casing will be lower-cased, |
|
1204 * except for specific SVG and MathML tags or attributes. |
|
1205 * - Text will be re-encoded, null bytes handled, |
|
1206 * and invalid UTF-8 replaced with U+FFFD. |
|
1207 * - Any incomplete syntax trailing at the end will be omitted, |
|
1208 * for example, an unclosed comment opener will be removed. |
|
1209 * |
|
1210 * Example: |
|
1211 * |
|
1212 * echo WP_HTML_Processor::normalize( '<a href=#anchor v=5 href="/" enabled>One</a another v=5><!--' ); |
|
1213 * // <a href="#anchor" v="5" enabled>One</a> |
|
1214 * |
|
1215 * echo WP_HTML_Processor::normalize( '<div></p>fun<table><td>cell</div>' ); |
|
1216 * // <div><p></p>fun<table><tbody><tr><td>cell</td></tr></tbody></table></div> |
|
1217 * |
|
1218 * echo WP_HTML_Processor::normalize( '<![CDATA[invalid comment]]> syntax < <> "oddities"' ); |
|
1219 * // <!--[CDATA[invalid comment]]--> syntax < <> "oddities" |
|
1220 * |
|
1221 * @since 6.7.0 |
|
1222 * |
|
1223 * @param string $html Input HTML to normalize. |
|
1224 * |
|
1225 * @return string|null Normalized output, or `null` if unable to normalize. |
|
1226 */ |
|
1227 public static function normalize( string $html ): ?string { |
|
1228 return static::create_fragment( $html )->serialize(); |
|
1229 } |
|
1230 |
|
1231 /** |
|
1232 * Returns normalized HTML for a fragment by serializing it. |
|
1233 * |
|
1234 * This differs from {@see WP_HTML_Processor::normalize} in that it starts with |
|
1235 * a specific HTML Processor, which _must_ not have already started scanning; |
|
1236 * it must be in the initial ready state and will be in the completed state once |
|
1237 * serialization is complete. |
|
1238 * |
|
1239 * Many aspects of an input HTML fragment may be changed during normalization. |
|
1240 * |
|
1241 * - Attribute values will be double-quoted. |
|
1242 * - Duplicate attributes will be removed. |
|
1243 * - Omitted tags will be added. |
|
1244 * - Tag and attribute name casing will be lower-cased, |
|
1245 * except for specific SVG and MathML tags or attributes. |
|
1246 * - Text will be re-encoded, null bytes handled, |
|
1247 * and invalid UTF-8 replaced with U+FFFD. |
|
1248 * - Any incomplete syntax trailing at the end will be omitted, |
|
1249 * for example, an unclosed comment opener will be removed. |
|
1250 * |
|
1251 * Example: |
|
1252 * |
|
1253 * $processor = WP_HTML_Processor::create_fragment( '<a href=#anchor v=5 href="/" enabled>One</a another v=5><!--' ); |
|
1254 * echo $processor->serialize(); |
|
1255 * // <a href="#anchor" v="5" enabled>One</a> |
|
1256 * |
|
1257 * $processor = WP_HTML_Processor::create_fragment( '<div></p>fun<table><td>cell</div>' ); |
|
1258 * echo $processor->serialize(); |
|
1259 * // <div><p></p>fun<table><tbody><tr><td>cell</td></tr></tbody></table></div> |
|
1260 * |
|
1261 * $processor = WP_HTML_Processor::create_fragment( '<![CDATA[invalid comment]]> syntax < <> "oddities"' ); |
|
1262 * echo $processor->serialize(); |
|
1263 * // <!--[CDATA[invalid comment]]--> syntax < <> "oddities" |
|
1264 * |
|
1265 * @since 6.7.0 |
|
1266 * |
|
1267 * @return string|null Normalized HTML markup represented by processor, |
|
1268 * or `null` if unable to generate serialization. |
|
1269 */ |
|
1270 public function serialize(): ?string { |
|
1271 if ( WP_HTML_Tag_Processor::STATE_READY !== $this->parser_state ) { |
|
1272 wp_trigger_error( |
|
1273 __METHOD__, |
|
1274 'An HTML Processor which has already started processing cannot serialize its contents. Serialize immediately after creating the instance.', |
|
1275 E_USER_WARNING |
|
1276 ); |
|
1277 return null; |
|
1278 } |
|
1279 |
|
1280 $html = ''; |
|
1281 while ( $this->next_token() ) { |
|
1282 $html .= $this->serialize_token(); |
|
1283 } |
|
1284 |
|
1285 if ( null !== $this->get_last_error() ) { |
|
1286 wp_trigger_error( |
|
1287 __METHOD__, |
|
1288 "Cannot serialize HTML Processor with parsing error: {$this->get_last_error()}.", |
|
1289 E_USER_WARNING |
|
1290 ); |
|
1291 return null; |
|
1292 } |
|
1293 |
|
1294 return $html; |
|
1295 } |
|
1296 |
|
1297 /** |
|
1298 * Serializes the currently-matched token. |
|
1299 * |
|
1300 * This method produces a fully-normative HTML string for the currently-matched token, |
|
1301 * if able. If not matched at any token or if the token doesn't correspond to any HTML |
|
1302 * it will return an empty string (for example, presumptuous end tags are ignored). |
|
1303 * |
|
1304 * @see static::serialize() |
|
1305 * |
|
1306 * @since 6.7.0 |
|
1307 * |
|
1308 * @return string Serialization of token, or empty string if no serialization exists. |
|
1309 */ |
|
1310 protected function serialize_token(): string { |
|
1311 $html = ''; |
|
1312 $token_type = $this->get_token_type(); |
|
1313 |
|
1314 switch ( $token_type ) { |
|
1315 case '#doctype': |
|
1316 $doctype = $this->get_doctype_info(); |
|
1317 if ( null === $doctype ) { |
|
1318 break; |
|
1319 } |
|
1320 |
|
1321 $html .= '<!DOCTYPE'; |
|
1322 |
|
1323 if ( $doctype->name ) { |
|
1324 $html .= " {$doctype->name}"; |
|
1325 } |
|
1326 |
|
1327 if ( null !== $doctype->public_identifier ) { |
|
1328 $quote = str_contains( $doctype->public_identifier, '"' ) ? "'" : '"'; |
|
1329 $html .= " PUBLIC {$quote}{$doctype->public_identifier}{$quote}"; |
|
1330 } |
|
1331 if ( null !== $doctype->system_identifier ) { |
|
1332 if ( null === $doctype->public_identifier ) { |
|
1333 $html .= ' SYSTEM'; |
|
1334 } |
|
1335 $quote = str_contains( $doctype->system_identifier, '"' ) ? "'" : '"'; |
|
1336 $html .= " {$quote}{$doctype->system_identifier}{$quote}"; |
|
1337 } |
|
1338 |
|
1339 $html .= '>'; |
|
1340 break; |
|
1341 |
|
1342 case '#text': |
|
1343 $html .= htmlspecialchars( $this->get_modifiable_text(), ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' ); |
|
1344 break; |
|
1345 |
|
1346 // Unlike the `<>` which is interpreted as plaintext, this is ignored entirely. |
|
1347 case '#presumptuous-tag': |
|
1348 break; |
|
1349 |
|
1350 case '#funky-comment': |
|
1351 case '#comment': |
|
1352 $html .= "<!--{$this->get_full_comment_text()}-->"; |
|
1353 break; |
|
1354 |
|
1355 case '#cdata-section': |
|
1356 $html .= "<![CDATA[{$this->get_modifiable_text()}]]>"; |
|
1357 break; |
|
1358 } |
|
1359 |
|
1360 if ( '#tag' !== $token_type ) { |
|
1361 return $html; |
|
1362 } |
|
1363 |
|
1364 $tag_name = str_replace( "\x00", "\u{FFFD}", $this->get_tag() ); |
|
1365 $in_html = 'html' === $this->get_namespace(); |
|
1366 $qualified_name = $in_html ? strtolower( $tag_name ) : $this->get_qualified_tag_name(); |
|
1367 |
|
1368 if ( $this->is_tag_closer() ) { |
|
1369 $html .= "</{$qualified_name}>"; |
|
1370 return $html; |
|
1371 } |
|
1372 |
|
1373 $attribute_names = $this->get_attribute_names_with_prefix( '' ); |
|
1374 if ( ! isset( $attribute_names ) ) { |
|
1375 $html .= "<{$qualified_name}>"; |
|
1376 return $html; |
|
1377 } |
|
1378 |
|
1379 $html .= "<{$qualified_name}"; |
|
1380 foreach ( $attribute_names as $attribute_name ) { |
|
1381 $html .= " {$this->get_qualified_attribute_name( $attribute_name )}"; |
|
1382 $value = $this->get_attribute( $attribute_name ); |
|
1383 |
|
1384 if ( is_string( $value ) ) { |
|
1385 $html .= '="' . htmlspecialchars( $value, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5 ) . '"'; |
|
1386 } |
|
1387 |
|
1388 $html = str_replace( "\x00", "\u{FFFD}", $html ); |
|
1389 } |
|
1390 |
|
1391 if ( ! $in_html && $this->has_self_closing_flag() ) { |
|
1392 $html .= ' /'; |
|
1393 } |
|
1394 |
|
1395 $html .= '>'; |
|
1396 |
|
1397 // Flush out self-contained elements. |
|
1398 if ( $in_html && in_array( $tag_name, array( 'IFRAME', 'NOEMBED', 'NOFRAMES', 'SCRIPT', 'STYLE', 'TEXTAREA', 'TITLE', 'XMP' ), true ) ) { |
|
1399 $text = $this->get_modifiable_text(); |
|
1400 |
|
1401 switch ( $tag_name ) { |
|
1402 case 'IFRAME': |
|
1403 case 'NOEMBED': |
|
1404 case 'NOFRAMES': |
|
1405 $text = ''; |
|
1406 break; |
|
1407 |
|
1408 case 'SCRIPT': |
|
1409 case 'STYLE': |
|
1410 break; |
|
1411 |
|
1412 default: |
|
1413 $text = htmlspecialchars( $text, ENT_QUOTES | ENT_SUBSTITUTE | ENT_HTML5, 'UTF-8' ); |
|
1414 } |
|
1415 |
|
1416 $html .= "{$text}</{$qualified_name}>"; |
|
1417 } |
|
1418 |
|
1419 return $html; |
|
1420 } |
|
1421 |
|
1422 /** |
|
1423 * Parses next element in the 'initial' insertion mode. |
|
1424 * |
|
1425 * This internal function performs the 'initial' insertion mode |
|
871 * logic for the generalized WP_HTML_Processor::step() function. |
1426 * logic for the generalized WP_HTML_Processor::step() function. |
872 * |
1427 * |
873 * @since 6.4.0 |
1428 * @since 6.7.0 |
874 * |
1429 * |
875 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
1430 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
876 * |
1431 * |
877 * @see https://html.spec.whatwg.org/#parsing-main-inbody |
1432 * @see https://html.spec.whatwg.org/#the-initial-insertion-mode |
878 * @see WP_HTML_Processor::step |
1433 * @see WP_HTML_Processor::step |
879 * |
1434 * |
880 * @return bool Whether an element was found. |
1435 * @return bool Whether an element was found. |
881 */ |
1436 */ |
882 private function step_in_body() { |
1437 private function step_initial(): bool { |
883 $token_name = $this->get_token_name(); |
1438 $token_name = $this->get_token_name(); |
884 $token_type = $this->get_token_type(); |
1439 $token_type = $this->get_token_type(); |
885 $op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : ''; |
1440 $op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : ''; |
886 $op = "{$op_sigil}{$token_name}"; |
1441 $op = "{$op_sigil}{$token_name}"; |
887 |
1442 |
888 switch ( $op ) { |
1443 switch ( $op ) { |
1444 /* |
|
1445 * > A character token that is one of U+0009 CHARACTER TABULATION, |
|
1446 * > U+000A LINE FEED (LF), U+000C FORM FEED (FF), |
|
1447 * > U+000D CARRIAGE RETURN (CR), or U+0020 SPACE |
|
1448 * |
|
1449 * Parse error: ignore the token. |
|
1450 */ |
|
1451 case '#text': |
|
1452 if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { |
|
1453 return $this->step(); |
|
1454 } |
|
1455 goto initial_anything_else; |
|
1456 break; |
|
1457 |
|
1458 /* |
|
1459 * > A comment token |
|
1460 */ |
|
889 case '#comment': |
1461 case '#comment': |
890 case '#funky-comment': |
1462 case '#funky-comment': |
891 case '#presumptuous-tag': |
1463 case '#presumptuous-tag': |
892 $this->insert_html_element( $this->state->current_token ); |
1464 $this->insert_html_element( $this->state->current_token ); |
893 return true; |
1465 return true; |
894 |
1466 |
1467 /* |
|
1468 * > A DOCTYPE token |
|
1469 */ |
|
1470 case 'html': |
|
1471 $doctype = $this->get_doctype_info(); |
|
1472 if ( null !== $doctype && 'quirks' === $doctype->indicated_compatability_mode ) { |
|
1473 $this->compat_mode = WP_HTML_Tag_Processor::QUIRKS_MODE; |
|
1474 } |
|
1475 |
|
1476 /* |
|
1477 * > Then, switch the insertion mode to "before html". |
|
1478 */ |
|
1479 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML; |
|
1480 $this->insert_html_element( $this->state->current_token ); |
|
1481 return true; |
|
1482 } |
|
1483 |
|
1484 /* |
|
1485 * > Anything else |
|
1486 */ |
|
1487 initial_anything_else: |
|
1488 $this->compat_mode = WP_HTML_Tag_Processor::QUIRKS_MODE; |
|
1489 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML; |
|
1490 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
1491 } |
|
1492 |
|
1493 /** |
|
1494 * Parses next element in the 'before html' insertion mode. |
|
1495 * |
|
1496 * This internal function performs the 'before html' insertion mode |
|
1497 * logic for the generalized WP_HTML_Processor::step() function. |
|
1498 * |
|
1499 * @since 6.7.0 |
|
1500 * |
|
1501 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
1502 * |
|
1503 * @see https://html.spec.whatwg.org/#the-before-html-insertion-mode |
|
1504 * @see WP_HTML_Processor::step |
|
1505 * |
|
1506 * @return bool Whether an element was found. |
|
1507 */ |
|
1508 private function step_before_html(): bool { |
|
1509 $token_name = $this->get_token_name(); |
|
1510 $token_type = $this->get_token_type(); |
|
1511 $is_closer = parent::is_tag_closer(); |
|
1512 $op_sigil = '#tag' === $token_type ? ( $is_closer ? '-' : '+' ) : ''; |
|
1513 $op = "{$op_sigil}{$token_name}"; |
|
1514 |
|
1515 switch ( $op ) { |
|
1516 /* |
|
1517 * > A DOCTYPE token |
|
1518 */ |
|
1519 case 'html': |
|
1520 // Parse error: ignore the token. |
|
1521 return $this->step(); |
|
1522 |
|
1523 /* |
|
1524 * > A comment token |
|
1525 */ |
|
1526 case '#comment': |
|
1527 case '#funky-comment': |
|
1528 case '#presumptuous-tag': |
|
1529 $this->insert_html_element( $this->state->current_token ); |
|
1530 return true; |
|
1531 |
|
1532 /* |
|
1533 * > A character token that is one of U+0009 CHARACTER TABULATION, |
|
1534 * > U+000A LINE FEED (LF), U+000C FORM FEED (FF), |
|
1535 * > U+000D CARRIAGE RETURN (CR), or U+0020 SPACE |
|
1536 * |
|
1537 * Parse error: ignore the token. |
|
1538 */ |
|
895 case '#text': |
1539 case '#text': |
896 $this->reconstruct_active_formatting_elements(); |
1540 if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { |
897 |
1541 return $this->step(); |
898 $current_token = $this->bookmarks[ $this->state->current_token->bookmark_name ]; |
1542 } |
899 |
1543 goto before_html_anything_else; |
1544 break; |
|
1545 |
|
1546 /* |
|
1547 * > A start tag whose tag name is "html" |
|
1548 */ |
|
1549 case '+HTML': |
|
1550 $this->insert_html_element( $this->state->current_token ); |
|
1551 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD; |
|
1552 return true; |
|
1553 |
|
1554 /* |
|
1555 * > An end tag whose tag name is one of: "head", "body", "html", "br" |
|
1556 * |
|
1557 * Closing BR tags are always reported by the Tag Processor as opening tags. |
|
1558 */ |
|
1559 case '-HEAD': |
|
1560 case '-BODY': |
|
1561 case '-HTML': |
|
1562 /* |
|
1563 * > Act as described in the "anything else" entry below. |
|
1564 */ |
|
1565 goto before_html_anything_else; |
|
1566 break; |
|
1567 } |
|
1568 |
|
1569 /* |
|
1570 * > Any other end tag |
|
1571 */ |
|
1572 if ( $is_closer ) { |
|
1573 // Parse error: ignore the token. |
|
1574 return $this->step(); |
|
1575 } |
|
1576 |
|
1577 /* |
|
1578 * > Anything else. |
|
1579 * |
|
1580 * > Create an html element whose node document is the Document object. |
|
1581 * > Append it to the Document object. Put this element in the stack of open elements. |
|
1582 * > Switch the insertion mode to "before head", then reprocess the token. |
|
1583 */ |
|
1584 before_html_anything_else: |
|
1585 $this->insert_virtual_node( 'HTML' ); |
|
1586 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD; |
|
1587 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
1588 } |
|
1589 |
|
1590 /** |
|
1591 * Parses next element in the 'before head' insertion mode. |
|
1592 * |
|
1593 * This internal function performs the 'before head' insertion mode |
|
1594 * logic for the generalized WP_HTML_Processor::step() function. |
|
1595 * |
|
1596 * @since 6.7.0 Stub implementation. |
|
1597 * |
|
1598 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
1599 * |
|
1600 * @see https://html.spec.whatwg.org/#the-before-head-insertion-mode |
|
1601 * @see WP_HTML_Processor::step |
|
1602 * |
|
1603 * @return bool Whether an element was found. |
|
1604 */ |
|
1605 private function step_before_head(): bool { |
|
1606 $token_name = $this->get_token_name(); |
|
1607 $token_type = $this->get_token_type(); |
|
1608 $is_closer = parent::is_tag_closer(); |
|
1609 $op_sigil = '#tag' === $token_type ? ( $is_closer ? '-' : '+' ) : ''; |
|
1610 $op = "{$op_sigil}{$token_name}"; |
|
1611 |
|
1612 switch ( $op ) { |
|
1613 /* |
|
1614 * > A character token that is one of U+0009 CHARACTER TABULATION, |
|
1615 * > U+000A LINE FEED (LF), U+000C FORM FEED (FF), |
|
1616 * > U+000D CARRIAGE RETURN (CR), or U+0020 SPACE |
|
1617 * |
|
1618 * Parse error: ignore the token. |
|
1619 */ |
|
1620 case '#text': |
|
1621 if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { |
|
1622 return $this->step(); |
|
1623 } |
|
1624 goto before_head_anything_else; |
|
1625 break; |
|
1626 |
|
1627 /* |
|
1628 * > A comment token |
|
1629 */ |
|
1630 case '#comment': |
|
1631 case '#funky-comment': |
|
1632 case '#presumptuous-tag': |
|
1633 $this->insert_html_element( $this->state->current_token ); |
|
1634 return true; |
|
1635 |
|
1636 /* |
|
1637 * > A DOCTYPE token |
|
1638 */ |
|
1639 case 'html': |
|
1640 // Parse error: ignore the token. |
|
1641 return $this->step(); |
|
1642 |
|
1643 /* |
|
1644 * > A start tag whose tag name is "html" |
|
1645 */ |
|
1646 case '+HTML': |
|
1647 return $this->step_in_body(); |
|
1648 |
|
1649 /* |
|
1650 * > A start tag whose tag name is "head" |
|
1651 */ |
|
1652 case '+HEAD': |
|
1653 $this->insert_html_element( $this->state->current_token ); |
|
1654 $this->state->head_element = $this->state->current_token; |
|
1655 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD; |
|
1656 return true; |
|
1657 |
|
1658 /* |
|
1659 * > An end tag whose tag name is one of: "head", "body", "html", "br" |
|
1660 * > Act as described in the "anything else" entry below. |
|
1661 * |
|
1662 * Closing BR tags are always reported by the Tag Processor as opening tags. |
|
1663 */ |
|
1664 case '-HEAD': |
|
1665 case '-BODY': |
|
1666 case '-HTML': |
|
1667 goto before_head_anything_else; |
|
1668 break; |
|
1669 } |
|
1670 |
|
1671 if ( $is_closer ) { |
|
1672 // Parse error: ignore the token. |
|
1673 return $this->step(); |
|
1674 } |
|
1675 |
|
1676 /* |
|
1677 * > Anything else |
|
1678 * |
|
1679 * > Insert an HTML element for a "head" start tag token with no attributes. |
|
1680 */ |
|
1681 before_head_anything_else: |
|
1682 $this->state->head_element = $this->insert_virtual_node( 'HEAD' ); |
|
1683 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD; |
|
1684 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
1685 } |
|
1686 |
|
1687 /** |
|
1688 * Parses next element in the 'in head' insertion mode. |
|
1689 * |
|
1690 * This internal function performs the 'in head' insertion mode |
|
1691 * logic for the generalized WP_HTML_Processor::step() function. |
|
1692 * |
|
1693 * @since 6.7.0 |
|
1694 * |
|
1695 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
1696 * |
|
1697 * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inhead |
|
1698 * @see WP_HTML_Processor::step |
|
1699 * |
|
1700 * @return bool Whether an element was found. |
|
1701 */ |
|
1702 private function step_in_head(): bool { |
|
1703 $token_name = $this->get_token_name(); |
|
1704 $token_type = $this->get_token_type(); |
|
1705 $is_closer = parent::is_tag_closer(); |
|
1706 $op_sigil = '#tag' === $token_type ? ( $is_closer ? '-' : '+' ) : ''; |
|
1707 $op = "{$op_sigil}{$token_name}"; |
|
1708 |
|
1709 switch ( $op ) { |
|
1710 case '#text': |
|
1711 /* |
|
1712 * > A character token that is one of U+0009 CHARACTER TABULATION, |
|
1713 * > U+000A LINE FEED (LF), U+000C FORM FEED (FF), |
|
1714 * > U+000D CARRIAGE RETURN (CR), or U+0020 SPACE |
|
1715 */ |
|
1716 if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { |
|
1717 // Insert the character. |
|
1718 $this->insert_html_element( $this->state->current_token ); |
|
1719 return true; |
|
1720 } |
|
1721 |
|
1722 goto in_head_anything_else; |
|
1723 break; |
|
1724 |
|
1725 /* |
|
1726 * > A comment token |
|
1727 */ |
|
1728 case '#comment': |
|
1729 case '#funky-comment': |
|
1730 case '#presumptuous-tag': |
|
1731 $this->insert_html_element( $this->state->current_token ); |
|
1732 return true; |
|
1733 |
|
1734 /* |
|
1735 * > A DOCTYPE token |
|
1736 */ |
|
1737 case 'html': |
|
1738 // Parse error: ignore the token. |
|
1739 return $this->step(); |
|
1740 |
|
1741 /* |
|
1742 * > A start tag whose tag name is "html" |
|
1743 */ |
|
1744 case '+HTML': |
|
1745 return $this->step_in_body(); |
|
1746 |
|
1747 /* |
|
1748 * > A start tag whose tag name is one of: "base", "basefont", "bgsound", "link" |
|
1749 */ |
|
1750 case '+BASE': |
|
1751 case '+BASEFONT': |
|
1752 case '+BGSOUND': |
|
1753 case '+LINK': |
|
1754 $this->insert_html_element( $this->state->current_token ); |
|
1755 return true; |
|
1756 |
|
1757 /* |
|
1758 * > A start tag whose tag name is "meta" |
|
1759 */ |
|
1760 case '+META': |
|
1761 $this->insert_html_element( $this->state->current_token ); |
|
1762 |
|
1763 /* |
|
1764 * > If the active speculative HTML parser is null, then: |
|
1765 * > - If the element has a charset attribute, and getting an encoding from |
|
1766 * > its value results in an encoding, and the confidence is currently |
|
1767 * > tentative, then change the encoding to the resulting encoding. |
|
1768 */ |
|
1769 $charset = $this->get_attribute( 'charset' ); |
|
1770 if ( is_string( $charset ) && 'tentative' === $this->state->encoding_confidence ) { |
|
1771 $this->bail( 'Cannot yet process META tags with charset to determine encoding.' ); |
|
1772 } |
|
1773 |
|
1774 /* |
|
1775 * > - Otherwise, if the element has an http-equiv attribute whose value is |
|
1776 * > an ASCII case-insensitive match for the string "Content-Type", and |
|
1777 * > the element has a content attribute, and applying the algorithm for |
|
1778 * > extracting a character encoding from a meta element to that attribute's |
|
1779 * > value returns an encoding, and the confidence is currently tentative, |
|
1780 * > then change the encoding to the extracted encoding. |
|
1781 */ |
|
1782 $http_equiv = $this->get_attribute( 'http-equiv' ); |
|
1783 $content = $this->get_attribute( 'content' ); |
|
1784 if ( |
|
1785 is_string( $http_equiv ) && |
|
1786 is_string( $content ) && |
|
1787 0 === strcasecmp( $http_equiv, 'Content-Type' ) && |
|
1788 'tentative' === $this->state->encoding_confidence |
|
1789 ) { |
|
1790 $this->bail( 'Cannot yet process META tags with http-equiv Content-Type to determine encoding.' ); |
|
1791 } |
|
1792 |
|
1793 return true; |
|
1794 |
|
1795 /* |
|
1796 * > A start tag whose tag name is "title" |
|
1797 */ |
|
1798 case '+TITLE': |
|
1799 $this->insert_html_element( $this->state->current_token ); |
|
1800 return true; |
|
1801 |
|
1802 /* |
|
1803 * > A start tag whose tag name is "noscript", if the scripting flag is enabled |
|
1804 * > A start tag whose tag name is one of: "noframes", "style" |
|
1805 * |
|
1806 * The scripting flag is never enabled in this parser. |
|
1807 */ |
|
1808 case '+NOFRAMES': |
|
1809 case '+STYLE': |
|
1810 $this->insert_html_element( $this->state->current_token ); |
|
1811 return true; |
|
1812 |
|
1813 /* |
|
1814 * > A start tag whose tag name is "noscript", if the scripting flag is disabled |
|
1815 */ |
|
1816 case '+NOSCRIPT': |
|
1817 $this->insert_html_element( $this->state->current_token ); |
|
1818 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD_NOSCRIPT; |
|
1819 return true; |
|
1820 |
|
1821 /* |
|
1822 * > A start tag whose tag name is "script" |
|
1823 * |
|
1824 * @todo Could the adjusted insertion location be anything other than the current location? |
|
1825 */ |
|
1826 case '+SCRIPT': |
|
1827 $this->insert_html_element( $this->state->current_token ); |
|
1828 return true; |
|
1829 |
|
1830 /* |
|
1831 * > An end tag whose tag name is "head" |
|
1832 */ |
|
1833 case '-HEAD': |
|
1834 $this->state->stack_of_open_elements->pop(); |
|
1835 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD; |
|
1836 return true; |
|
1837 |
|
1838 /* |
|
1839 * > An end tag whose tag name is one of: "body", "html", "br" |
|
1840 * |
|
1841 * BR tags are always reported by the Tag Processor as opening tags. |
|
1842 */ |
|
1843 case '-BODY': |
|
1844 case '-HTML': |
|
1845 /* |
|
1846 * > Act as described in the "anything else" entry below. |
|
1847 */ |
|
1848 goto in_head_anything_else; |
|
1849 break; |
|
1850 |
|
1851 /* |
|
1852 * > A start tag whose tag name is "template" |
|
1853 * |
|
1854 * @todo Could the adjusted insertion location be anything other than the current location? |
|
1855 */ |
|
1856 case '+TEMPLATE': |
|
1857 $this->state->active_formatting_elements->insert_marker(); |
|
1858 $this->state->frameset_ok = false; |
|
1859 |
|
1860 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; |
|
1861 $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; |
|
1862 |
|
1863 $this->insert_html_element( $this->state->current_token ); |
|
1864 return true; |
|
1865 |
|
1866 /* |
|
1867 * > An end tag whose tag name is "template" |
|
1868 */ |
|
1869 case '-TEMPLATE': |
|
1870 if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) { |
|
1871 // @todo Indicate a parse error once it's possible. |
|
1872 return $this->step(); |
|
1873 } |
|
1874 |
|
1875 $this->generate_implied_end_tags_thoroughly(); |
|
1876 if ( ! $this->state->stack_of_open_elements->current_node_is( 'TEMPLATE' ) ) { |
|
1877 // @todo Indicate a parse error once it's possible. |
|
1878 } |
|
1879 |
|
1880 $this->state->stack_of_open_elements->pop_until( 'TEMPLATE' ); |
|
1881 $this->state->active_formatting_elements->clear_up_to_last_marker(); |
|
1882 array_pop( $this->state->stack_of_template_insertion_modes ); |
|
1883 $this->reset_insertion_mode_appropriately(); |
|
1884 return true; |
|
1885 } |
|
1886 |
|
1887 /* |
|
1888 * > A start tag whose tag name is "head" |
|
1889 * > Any other end tag |
|
1890 */ |
|
1891 if ( '+HEAD' === $op || $is_closer ) { |
|
1892 // Parse error: ignore the token. |
|
1893 return $this->step(); |
|
1894 } |
|
1895 |
|
1896 /* |
|
1897 * > Anything else |
|
1898 */ |
|
1899 in_head_anything_else: |
|
1900 $this->state->stack_of_open_elements->pop(); |
|
1901 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD; |
|
1902 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
1903 } |
|
1904 |
|
1905 /** |
|
1906 * Parses next element in the 'in head noscript' insertion mode. |
|
1907 * |
|
1908 * This internal function performs the 'in head noscript' insertion mode |
|
1909 * logic for the generalized WP_HTML_Processor::step() function. |
|
1910 * |
|
1911 * @since 6.7.0 Stub implementation. |
|
1912 * |
|
1913 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
1914 * |
|
1915 * @see https://html.spec.whatwg.org/#parsing-main-inheadnoscript |
|
1916 * @see WP_HTML_Processor::step |
|
1917 * |
|
1918 * @return bool Whether an element was found. |
|
1919 */ |
|
1920 private function step_in_head_noscript(): bool { |
|
1921 $token_name = $this->get_token_name(); |
|
1922 $token_type = $this->get_token_type(); |
|
1923 $is_closer = parent::is_tag_closer(); |
|
1924 $op_sigil = '#tag' === $token_type ? ( $is_closer ? '-' : '+' ) : ''; |
|
1925 $op = "{$op_sigil}{$token_name}"; |
|
1926 |
|
1927 switch ( $op ) { |
|
1928 /* |
|
1929 * > A character token that is one of U+0009 CHARACTER TABULATION, |
|
1930 * > U+000A LINE FEED (LF), U+000C FORM FEED (FF), |
|
1931 * > U+000D CARRIAGE RETURN (CR), or U+0020 SPACE |
|
1932 * |
|
1933 * Parse error: ignore the token. |
|
1934 */ |
|
1935 case '#text': |
|
1936 if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { |
|
1937 return $this->step_in_head(); |
|
1938 } |
|
1939 |
|
1940 goto in_head_noscript_anything_else; |
|
1941 break; |
|
1942 |
|
1943 /* |
|
1944 * > A DOCTYPE token |
|
1945 */ |
|
1946 case 'html': |
|
1947 // Parse error: ignore the token. |
|
1948 return $this->step(); |
|
1949 |
|
1950 /* |
|
1951 * > A start tag whose tag name is "html" |
|
1952 */ |
|
1953 case '+HTML': |
|
1954 return $this->step_in_body(); |
|
1955 |
|
1956 /* |
|
1957 * > An end tag whose tag name is "noscript" |
|
1958 */ |
|
1959 case '-NOSCRIPT': |
|
1960 $this->state->stack_of_open_elements->pop(); |
|
1961 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD; |
|
1962 return true; |
|
1963 |
|
1964 /* |
|
1965 * > A comment token |
|
1966 * > |
|
1967 * > A start tag whose tag name is one of: "basefont", "bgsound", |
|
1968 * > "link", "meta", "noframes", "style" |
|
1969 */ |
|
1970 case '#comment': |
|
1971 case '#funky-comment': |
|
1972 case '#presumptuous-tag': |
|
1973 case '+BASEFONT': |
|
1974 case '+BGSOUND': |
|
1975 case '+LINK': |
|
1976 case '+META': |
|
1977 case '+NOFRAMES': |
|
1978 case '+STYLE': |
|
1979 return $this->step_in_head(); |
|
1980 |
|
1981 /* |
|
1982 * > An end tag whose tag name is "br" |
|
1983 * |
|
1984 * This should never happen, as the Tag Processor prevents showing a BR closing tag. |
|
1985 */ |
|
1986 } |
|
1987 |
|
1988 /* |
|
1989 * > A start tag whose tag name is one of: "head", "noscript" |
|
1990 * > Any other end tag |
|
1991 */ |
|
1992 if ( '+HEAD' === $op || '+NOSCRIPT' === $op || $is_closer ) { |
|
1993 // Parse error: ignore the token. |
|
1994 return $this->step(); |
|
1995 } |
|
1996 |
|
1997 /* |
|
1998 * > Anything else |
|
1999 * |
|
2000 * Anything here is a parse error. |
|
2001 */ |
|
2002 in_head_noscript_anything_else: |
|
2003 $this->state->stack_of_open_elements->pop(); |
|
2004 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD; |
|
2005 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
2006 } |
|
2007 |
|
2008 /** |
|
2009 * Parses next element in the 'after head' insertion mode. |
|
2010 * |
|
2011 * This internal function performs the 'after head' insertion mode |
|
2012 * logic for the generalized WP_HTML_Processor::step() function. |
|
2013 * |
|
2014 * @since 6.7.0 Stub implementation. |
|
2015 * |
|
2016 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
2017 * |
|
2018 * @see https://html.spec.whatwg.org/#the-after-head-insertion-mode |
|
2019 * @see WP_HTML_Processor::step |
|
2020 * |
|
2021 * @return bool Whether an element was found. |
|
2022 */ |
|
2023 private function step_after_head(): bool { |
|
2024 $token_name = $this->get_token_name(); |
|
2025 $token_type = $this->get_token_type(); |
|
2026 $is_closer = parent::is_tag_closer(); |
|
2027 $op_sigil = '#tag' === $token_type ? ( $is_closer ? '-' : '+' ) : ''; |
|
2028 $op = "{$op_sigil}{$token_name}"; |
|
2029 |
|
2030 switch ( $op ) { |
|
2031 /* |
|
2032 * > A character token that is one of U+0009 CHARACTER TABULATION, |
|
2033 * > U+000A LINE FEED (LF), U+000C FORM FEED (FF), |
|
2034 * > U+000D CARRIAGE RETURN (CR), or U+0020 SPACE |
|
2035 */ |
|
2036 case '#text': |
|
2037 if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { |
|
2038 // Insert the character. |
|
2039 $this->insert_html_element( $this->state->current_token ); |
|
2040 return true; |
|
2041 } |
|
2042 goto after_head_anything_else; |
|
2043 break; |
|
2044 |
|
2045 /* |
|
2046 * > A comment token |
|
2047 */ |
|
2048 case '#comment': |
|
2049 case '#funky-comment': |
|
2050 case '#presumptuous-tag': |
|
2051 $this->insert_html_element( $this->state->current_token ); |
|
2052 return true; |
|
2053 |
|
2054 /* |
|
2055 * > A DOCTYPE token |
|
2056 */ |
|
2057 case 'html': |
|
2058 // Parse error: ignore the token. |
|
2059 return $this->step(); |
|
2060 |
|
2061 /* |
|
2062 * > A start tag whose tag name is "html" |
|
2063 */ |
|
2064 case '+HTML': |
|
2065 return $this->step_in_body(); |
|
2066 |
|
2067 /* |
|
2068 * > A start tag whose tag name is "body" |
|
2069 */ |
|
2070 case '+BODY': |
|
2071 $this->insert_html_element( $this->state->current_token ); |
|
2072 $this->state->frameset_ok = false; |
|
2073 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; |
|
2074 return true; |
|
2075 |
|
2076 /* |
|
2077 * > A start tag whose tag name is "frameset" |
|
2078 */ |
|
2079 case '+FRAMESET': |
|
2080 $this->insert_html_element( $this->state->current_token ); |
|
2081 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET; |
|
2082 return true; |
|
2083 |
|
2084 /* |
|
2085 * > A start tag whose tag name is one of: "base", "basefont", "bgsound", |
|
2086 * > "link", "meta", "noframes", "script", "style", "template", "title" |
|
2087 * |
|
2088 * Anything here is a parse error. |
|
2089 */ |
|
2090 case '+BASE': |
|
2091 case '+BASEFONT': |
|
2092 case '+BGSOUND': |
|
2093 case '+LINK': |
|
2094 case '+META': |
|
2095 case '+NOFRAMES': |
|
2096 case '+SCRIPT': |
|
2097 case '+STYLE': |
|
2098 case '+TEMPLATE': |
|
2099 case '+TITLE': |
|
2100 /* |
|
2101 * > Push the node pointed to by the head element pointer onto the stack of open elements. |
|
2102 * > Process the token using the rules for the "in head" insertion mode. |
|
2103 * > Remove the node pointed to by the head element pointer from the stack of open elements. (It might not be the current node at this point.) |
|
2104 */ |
|
2105 $this->bail( 'Cannot process elements after HEAD which reopen the HEAD element.' ); |
|
2106 /* |
|
2107 * Do not leave this break in when adding support; it's here to prevent |
|
2108 * WPCS from getting confused at the switch structure without a return, |
|
2109 * because it doesn't know that `bail()` always throws. |
|
2110 */ |
|
2111 break; |
|
2112 |
|
2113 /* |
|
2114 * > An end tag whose tag name is "template" |
|
2115 */ |
|
2116 case '-TEMPLATE': |
|
2117 return $this->step_in_head(); |
|
2118 |
|
2119 /* |
|
2120 * > An end tag whose tag name is one of: "body", "html", "br" |
|
2121 * |
|
2122 * Closing BR tags are always reported by the Tag Processor as opening tags. |
|
2123 */ |
|
2124 case '-BODY': |
|
2125 case '-HTML': |
|
2126 /* |
|
2127 * > Act as described in the "anything else" entry below. |
|
2128 */ |
|
2129 goto after_head_anything_else; |
|
2130 break; |
|
2131 } |
|
2132 |
|
2133 /* |
|
2134 * > A start tag whose tag name is "head" |
|
2135 * > Any other end tag |
|
2136 */ |
|
2137 if ( '+HEAD' === $op || $is_closer ) { |
|
2138 // Parse error: ignore the token. |
|
2139 return $this->step(); |
|
2140 } |
|
2141 |
|
2142 /* |
|
2143 * > Anything else |
|
2144 * > Insert an HTML element for a "body" start tag token with no attributes. |
|
2145 */ |
|
2146 after_head_anything_else: |
|
2147 $this->insert_virtual_node( 'BODY' ); |
|
2148 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; |
|
2149 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
2150 } |
|
2151 |
|
2152 /** |
|
2153 * Parses next element in the 'in body' insertion mode. |
|
2154 * |
|
2155 * This internal function performs the 'in body' insertion mode |
|
2156 * logic for the generalized WP_HTML_Processor::step() function. |
|
2157 * |
|
2158 * @since 6.4.0 |
|
2159 * |
|
2160 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
2161 * |
|
2162 * @see https://html.spec.whatwg.org/#parsing-main-inbody |
|
2163 * @see WP_HTML_Processor::step |
|
2164 * |
|
2165 * @return bool Whether an element was found. |
|
2166 */ |
|
2167 private function step_in_body(): bool { |
|
2168 $token_name = $this->get_token_name(); |
|
2169 $token_type = $this->get_token_type(); |
|
2170 $op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : ''; |
|
2171 $op = "{$op_sigil}{$token_name}"; |
|
2172 |
|
2173 switch ( $op ) { |
|
2174 case '#text': |
|
900 /* |
2175 /* |
901 * > A character token that is U+0000 NULL |
2176 * > A character token that is U+0000 NULL |
902 * |
2177 * |
903 * Any successive sequence of NULL bytes is ignored and won't |
2178 * Any successive sequence of NULL bytes is ignored and won't |
904 * trigger active format reconstruction. Therefore, if the text |
2179 * trigger active format reconstruction. Therefore, if the text |
905 * only comprises NULL bytes then the token should be ignored |
2180 * only comprises NULL bytes then the token should be ignored |
906 * here, but if there are any other characters in the stream |
2181 * here, but if there are any other characters in the stream |
907 * the active formats should be reconstructed. |
2182 * the active formats should be reconstructed. |
908 */ |
2183 */ |
909 if ( |
2184 if ( parent::TEXT_IS_NULL_SEQUENCE === $this->text_node_classification ) { |
910 1 <= $current_token->length && |
|
911 "\x00" === $this->html[ $current_token->start ] && |
|
912 strspn( $this->html, "\x00", $current_token->start, $current_token->length ) === $current_token->length |
|
913 ) { |
|
914 // Parse error: ignore the token. |
2185 // Parse error: ignore the token. |
915 return $this->step(); |
2186 return $this->step(); |
916 } |
2187 } |
2188 |
|
2189 $this->reconstruct_active_formatting_elements(); |
|
917 |
2190 |
918 /* |
2191 /* |
919 * Whitespace-only text does not affect the frameset-ok flag. |
2192 * Whitespace-only text does not affect the frameset-ok flag. |
920 * It is probably inter-element whitespace, but it may also |
2193 * It is probably inter-element whitespace, but it may also |
921 * contain character references which decode only to whitespace. |
2194 * contain character references which decode only to whitespace. |
922 */ |
2195 */ |
923 $text = $this->get_modifiable_text(); |
2196 if ( parent::TEXT_IS_GENERIC === $this->text_node_classification ) { |
924 if ( strlen( $text ) !== strspn( $text, " \t\n\f\r" ) ) { |
|
925 $this->state->frameset_ok = false; |
2197 $this->state->frameset_ok = false; |
926 } |
2198 } |
927 |
2199 |
928 $this->insert_html_element( $this->state->current_token ); |
2200 $this->insert_html_element( $this->state->current_token ); |
929 return true; |
2201 return true; |
930 |
2202 |
2203 case '#comment': |
|
2204 case '#funky-comment': |
|
2205 case '#presumptuous-tag': |
|
2206 $this->insert_html_element( $this->state->current_token ); |
|
2207 return true; |
|
2208 |
|
2209 /* |
|
2210 * > A DOCTYPE token |
|
2211 * > Parse error. Ignore the token. |
|
2212 */ |
|
931 case 'html': |
2213 case 'html': |
2214 return $this->step(); |
|
2215 |
|
2216 /* |
|
2217 * > A start tag whose tag name is "html" |
|
2218 */ |
|
2219 case '+HTML': |
|
2220 if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) { |
|
2221 /* |
|
2222 * > Otherwise, for each attribute on the token, check to see if the attribute |
|
2223 * > is already present on the top element of the stack of open elements. If |
|
2224 * > it is not, add the attribute and its corresponding value to that element. |
|
2225 * |
|
2226 * This parser does not currently support this behavior: ignore the token. |
|
2227 */ |
|
2228 } |
|
2229 |
|
2230 // Ignore the token. |
|
2231 return $this->step(); |
|
2232 |
|
2233 /* |
|
2234 * > A start tag whose tag name is one of: "base", "basefont", "bgsound", "link", |
|
2235 * > "meta", "noframes", "script", "style", "template", "title" |
|
2236 * > |
|
2237 * > An end tag whose tag name is "template" |
|
2238 */ |
|
2239 case '+BASE': |
|
2240 case '+BASEFONT': |
|
2241 case '+BGSOUND': |
|
2242 case '+LINK': |
|
2243 case '+META': |
|
2244 case '+NOFRAMES': |
|
2245 case '+SCRIPT': |
|
2246 case '+STYLE': |
|
2247 case '+TEMPLATE': |
|
2248 case '+TITLE': |
|
2249 case '-TEMPLATE': |
|
2250 return $this->step_in_head(); |
|
2251 |
|
2252 /* |
|
2253 * > A start tag whose tag name is "body" |
|
2254 * |
|
2255 * This tag in the IN BODY insertion mode is a parse error. |
|
2256 */ |
|
2257 case '+BODY': |
|
2258 if ( |
|
2259 1 === $this->state->stack_of_open_elements->count() || |
|
2260 'BODY' !== ( $this->state->stack_of_open_elements->at( 2 )->node_name ?? null ) || |
|
2261 $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) |
|
2262 ) { |
|
2263 // Ignore the token. |
|
2264 return $this->step(); |
|
2265 } |
|
2266 |
|
932 /* |
2267 /* |
933 * > A DOCTYPE token |
2268 * > Otherwise, set the frameset-ok flag to "not ok"; then, for each attribute |
934 * > Parse error. Ignore the token. |
2269 * > on the token, check to see if the attribute is already present on the body |
2270 * > element (the second element) on the stack of open elements, and if it is |
|
2271 * > not, add the attribute and its corresponding value to that element. |
|
2272 * |
|
2273 * This parser does not currently support this behavior: ignore the token. |
|
2274 */ |
|
2275 $this->state->frameset_ok = false; |
|
2276 return $this->step(); |
|
2277 |
|
2278 /* |
|
2279 * > A start tag whose tag name is "frameset" |
|
2280 * |
|
2281 * This tag in the IN BODY insertion mode is a parse error. |
|
2282 */ |
|
2283 case '+FRAMESET': |
|
2284 if ( |
|
2285 1 === $this->state->stack_of_open_elements->count() || |
|
2286 'BODY' !== ( $this->state->stack_of_open_elements->at( 2 )->node_name ?? null ) || |
|
2287 false === $this->state->frameset_ok |
|
2288 ) { |
|
2289 // Ignore the token. |
|
2290 return $this->step(); |
|
2291 } |
|
2292 |
|
2293 /* |
|
2294 * > Otherwise, run the following steps: |
|
2295 */ |
|
2296 $this->bail( 'Cannot process non-ignored FRAMESET tags.' ); |
|
2297 break; |
|
2298 |
|
2299 /* |
|
2300 * > An end tag whose tag name is "body" |
|
2301 */ |
|
2302 case '-BODY': |
|
2303 if ( ! $this->state->stack_of_open_elements->has_element_in_scope( 'BODY' ) ) { |
|
2304 // Parse error: ignore the token. |
|
2305 return $this->step(); |
|
2306 } |
|
2307 |
|
2308 /* |
|
2309 * > Otherwise, if there is a node in the stack of open elements that is not either a |
|
2310 * > dd element, a dt element, an li element, an optgroup element, an option element, |
|
2311 * > a p element, an rb element, an rp element, an rt element, an rtc element, a tbody |
|
2312 * > element, a td element, a tfoot element, a th element, a thread element, a tr |
|
2313 * > element, the body element, or the html element, then this is a parse error. |
|
2314 * |
|
2315 * There is nothing to do for this parse error, so don't check for it. |
|
2316 */ |
|
2317 |
|
2318 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY; |
|
2319 /* |
|
2320 * The BODY element is not removed from the stack of open elements. |
|
2321 * Only internal state has changed, this does not qualify as a "step" |
|
2322 * in terms of advancing through the document to another token. |
|
2323 * Nothing has been pushed or popped. |
|
2324 * Proceed to parse the next item. |
|
935 */ |
2325 */ |
936 return $this->step(); |
2326 return $this->step(); |
937 |
2327 |
938 /* |
2328 /* |
939 * > A start tag whose tag name is "button" |
2329 * > An end tag whose tag name is "html" |
940 */ |
2330 */ |
941 case '+BUTTON': |
2331 case '-HTML': |
942 if ( $this->state->stack_of_open_elements->has_element_in_scope( 'BUTTON' ) ) { |
2332 if ( ! $this->state->stack_of_open_elements->has_element_in_scope( 'BODY' ) ) { |
943 // @todo Indicate a parse error once it's possible. This error does not impact the logic here. |
2333 // Parse error: ignore the token. |
944 $this->generate_implied_end_tags(); |
2334 return $this->step(); |
945 $this->state->stack_of_open_elements->pop_until( 'BUTTON' ); |
2335 } |
946 } |
2336 |
947 |
2337 /* |
948 $this->reconstruct_active_formatting_elements(); |
2338 * > Otherwise, if there is a node in the stack of open elements that is not either a |
949 $this->insert_html_element( $this->state->current_token ); |
2339 * > dd element, a dt element, an li element, an optgroup element, an option element, |
950 $this->state->frameset_ok = false; |
2340 * > a p element, an rb element, an rp element, an rt element, an rtc element, a tbody |
951 |
2341 * > element, a td element, a tfoot element, a th element, a thread element, a tr |
952 return true; |
2342 * > element, the body element, or the html element, then this is a parse error. |
2343 * |
|
2344 * There is nothing to do for this parse error, so don't check for it. |
|
2345 */ |
|
2346 |
|
2347 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY; |
|
2348 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
953 |
2349 |
954 /* |
2350 /* |
955 * > A start tag whose tag name is one of: "address", "article", "aside", |
2351 * > A start tag whose tag name is one of: "address", "article", "aside", |
956 * > "blockquote", "center", "details", "dialog", "dir", "div", "dl", |
2352 * > "blockquote", "center", "details", "dialog", "dir", "div", "dl", |
957 * > "fieldset", "figcaption", "figure", "footer", "header", "hgroup", |
2353 * > "fieldset", "figcaption", "figure", "footer", "header", "hgroup", |
985 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { |
2381 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { |
986 $this->close_a_p_element(); |
2382 $this->close_a_p_element(); |
987 } |
2383 } |
988 |
2384 |
989 $this->insert_html_element( $this->state->current_token ); |
2385 $this->insert_html_element( $this->state->current_token ); |
2386 return true; |
|
2387 |
|
2388 /* |
|
2389 * > A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" |
|
2390 */ |
|
2391 case '+H1': |
|
2392 case '+H2': |
|
2393 case '+H3': |
|
2394 case '+H4': |
|
2395 case '+H5': |
|
2396 case '+H6': |
|
2397 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { |
|
2398 $this->close_a_p_element(); |
|
2399 } |
|
2400 |
|
2401 if ( |
|
2402 in_array( |
|
2403 $this->state->stack_of_open_elements->current_node()->node_name, |
|
2404 array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), |
|
2405 true |
|
2406 ) |
|
2407 ) { |
|
2408 // @todo Indicate a parse error once it's possible. |
|
2409 $this->state->stack_of_open_elements->pop(); |
|
2410 } |
|
2411 |
|
2412 $this->insert_html_element( $this->state->current_token ); |
|
2413 return true; |
|
2414 |
|
2415 /* |
|
2416 * > A start tag whose tag name is one of: "pre", "listing" |
|
2417 */ |
|
2418 case '+PRE': |
|
2419 case '+LISTING': |
|
2420 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { |
|
2421 $this->close_a_p_element(); |
|
2422 } |
|
2423 |
|
2424 /* |
|
2425 * > If the next token is a U+000A LINE FEED (LF) character token, |
|
2426 * > then ignore that token and move on to the next one. (Newlines |
|
2427 * > at the start of pre blocks are ignored as an authoring convenience.) |
|
2428 * |
|
2429 * This is handled in `get_modifiable_text()`. |
|
2430 */ |
|
2431 |
|
2432 $this->insert_html_element( $this->state->current_token ); |
|
2433 $this->state->frameset_ok = false; |
|
2434 return true; |
|
2435 |
|
2436 /* |
|
2437 * > A start tag whose tag name is "form" |
|
2438 */ |
|
2439 case '+FORM': |
|
2440 $stack_contains_template = $this->state->stack_of_open_elements->contains( 'TEMPLATE' ); |
|
2441 |
|
2442 if ( isset( $this->state->form_element ) && ! $stack_contains_template ) { |
|
2443 // Parse error: ignore the token. |
|
2444 return $this->step(); |
|
2445 } |
|
2446 |
|
2447 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { |
|
2448 $this->close_a_p_element(); |
|
2449 } |
|
2450 |
|
2451 $this->insert_html_element( $this->state->current_token ); |
|
2452 if ( ! $stack_contains_template ) { |
|
2453 $this->state->form_element = $this->state->current_token; |
|
2454 } |
|
2455 |
|
2456 return true; |
|
2457 |
|
2458 /* |
|
2459 * > A start tag whose tag name is "li" |
|
2460 * > A start tag whose tag name is one of: "dd", "dt" |
|
2461 */ |
|
2462 case '+DD': |
|
2463 case '+DT': |
|
2464 case '+LI': |
|
2465 $this->state->frameset_ok = false; |
|
2466 $node = $this->state->stack_of_open_elements->current_node(); |
|
2467 $is_li = 'LI' === $token_name; |
|
2468 |
|
2469 in_body_list_loop: |
|
2470 /* |
|
2471 * The logic for LI and DT/DD is the same except for one point: LI elements _only_ |
|
2472 * close other LI elements, but a DT or DD element closes _any_ open DT or DD element. |
|
2473 */ |
|
2474 if ( $is_li ? 'LI' === $node->node_name : ( 'DD' === $node->node_name || 'DT' === $node->node_name ) ) { |
|
2475 $node_name = $is_li ? 'LI' : $node->node_name; |
|
2476 $this->generate_implied_end_tags( $node_name ); |
|
2477 if ( ! $this->state->stack_of_open_elements->current_node_is( $node_name ) ) { |
|
2478 // @todo Indicate a parse error once it's possible. This error does not impact the logic here. |
|
2479 } |
|
2480 |
|
2481 $this->state->stack_of_open_elements->pop_until( $node_name ); |
|
2482 goto in_body_list_done; |
|
2483 } |
|
2484 |
|
2485 if ( |
|
2486 'ADDRESS' !== $node->node_name && |
|
2487 'DIV' !== $node->node_name && |
|
2488 'P' !== $node->node_name && |
|
2489 self::is_special( $node ) |
|
2490 ) { |
|
2491 /* |
|
2492 * > If node is in the special category, but is not an address, div, |
|
2493 * > or p element, then jump to the step labeled done below. |
|
2494 */ |
|
2495 goto in_body_list_done; |
|
2496 } else { |
|
2497 /* |
|
2498 * > Otherwise, set node to the previous entry in the stack of open elements |
|
2499 * > and return to the step labeled loop. |
|
2500 */ |
|
2501 foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) { |
|
2502 $node = $item; |
|
2503 break; |
|
2504 } |
|
2505 goto in_body_list_loop; |
|
2506 } |
|
2507 |
|
2508 in_body_list_done: |
|
2509 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { |
|
2510 $this->close_a_p_element(); |
|
2511 } |
|
2512 |
|
2513 $this->insert_html_element( $this->state->current_token ); |
|
2514 return true; |
|
2515 |
|
2516 case '+PLAINTEXT': |
|
2517 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { |
|
2518 $this->close_a_p_element(); |
|
2519 } |
|
2520 |
|
2521 /* |
|
2522 * @todo This may need to be handled in the Tag Processor and turn into |
|
2523 * a single self-contained tag like TEXTAREA, whose modifiable text |
|
2524 * is the rest of the input document as plaintext. |
|
2525 */ |
|
2526 $this->bail( 'Cannot process PLAINTEXT elements.' ); |
|
2527 break; |
|
2528 |
|
2529 /* |
|
2530 * > A start tag whose tag name is "button" |
|
2531 */ |
|
2532 case '+BUTTON': |
|
2533 if ( $this->state->stack_of_open_elements->has_element_in_scope( 'BUTTON' ) ) { |
|
2534 // @todo Indicate a parse error once it's possible. This error does not impact the logic here. |
|
2535 $this->generate_implied_end_tags(); |
|
2536 $this->state->stack_of_open_elements->pop_until( 'BUTTON' ); |
|
2537 } |
|
2538 |
|
2539 $this->reconstruct_active_formatting_elements(); |
|
2540 $this->insert_html_element( $this->state->current_token ); |
|
2541 $this->state->frameset_ok = false; |
|
2542 |
|
990 return true; |
2543 return true; |
991 |
2544 |
992 /* |
2545 /* |
993 * > An end tag whose tag name is one of: "address", "article", "aside", "blockquote", |
2546 * > An end tag whose tag name is one of: "address", "article", "aside", "blockquote", |
994 * > "button", "center", "details", "dialog", "dir", "div", "dl", "fieldset", |
2547 * > "button", "center", "details", "dialog", "dir", "div", "dl", "fieldset", |
1027 // Ignore the token. |
2580 // Ignore the token. |
1028 return $this->step(); |
2581 return $this->step(); |
1029 } |
2582 } |
1030 |
2583 |
1031 $this->generate_implied_end_tags(); |
2584 $this->generate_implied_end_tags(); |
1032 if ( $this->state->stack_of_open_elements->current_node()->node_name !== $token_name ) { |
2585 if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) { |
1033 // @todo Record parse error: this error doesn't impact parsing. |
2586 // @todo Record parse error: this error doesn't impact parsing. |
1034 } |
2587 } |
1035 $this->state->stack_of_open_elements->pop_until( $token_name ); |
2588 $this->state->stack_of_open_elements->pop_until( $token_name ); |
1036 return true; |
2589 return true; |
1037 |
2590 |
1038 /* |
2591 /* |
1039 * > A start tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" |
2592 * > An end tag whose tag name is "form" |
1040 */ |
2593 */ |
1041 case '+H1': |
2594 case '-FORM': |
1042 case '+H2': |
2595 if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) { |
1043 case '+H3': |
2596 $node = $this->state->form_element; |
1044 case '+H4': |
2597 $this->state->form_element = null; |
1045 case '+H5': |
2598 |
1046 case '+H6': |
|
1047 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { |
|
1048 $this->close_a_p_element(); |
|
1049 } |
|
1050 |
|
1051 if ( |
|
1052 in_array( |
|
1053 $this->state->stack_of_open_elements->current_node()->node_name, |
|
1054 array( 'H1', 'H2', 'H3', 'H4', 'H5', 'H6' ), |
|
1055 true |
|
1056 ) |
|
1057 ) { |
|
1058 // @todo Indicate a parse error once it's possible. |
|
1059 $this->state->stack_of_open_elements->pop(); |
|
1060 } |
|
1061 |
|
1062 $this->insert_html_element( $this->state->current_token ); |
|
1063 return true; |
|
1064 |
|
1065 /* |
|
1066 * > A start tag whose tag name is one of: "pre", "listing" |
|
1067 */ |
|
1068 case '+PRE': |
|
1069 case '+LISTING': |
|
1070 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { |
|
1071 $this->close_a_p_element(); |
|
1072 } |
|
1073 $this->insert_html_element( $this->state->current_token ); |
|
1074 $this->state->frameset_ok = false; |
|
1075 return true; |
|
1076 |
|
1077 /* |
|
1078 * > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" |
|
1079 */ |
|
1080 case '-H1': |
|
1081 case '-H2': |
|
1082 case '-H3': |
|
1083 case '-H4': |
|
1084 case '-H5': |
|
1085 case '-H6': |
|
1086 if ( ! $this->state->stack_of_open_elements->has_element_in_scope( '(internal: H1 through H6 - do not use)' ) ) { |
|
1087 /* |
2599 /* |
1088 * This is a parse error; ignore the token. |
2600 * > If node is null or if the stack of open elements does not have node |
2601 * > in scope, then this is a parse error; return and ignore the token. |
|
1089 * |
2602 * |
1090 * @todo Indicate a parse error once it's possible. |
2603 * @todo It's necessary to check if the form token itself is in scope, not |
2604 * simply whether any FORM is in scope. |
|
1091 */ |
2605 */ |
1092 return $this->step(); |
2606 if ( |
1093 } |
2607 null === $node || |
1094 |
2608 ! $this->state->stack_of_open_elements->has_element_in_scope( 'FORM' ) |
1095 $this->generate_implied_end_tags(); |
2609 ) { |
1096 |
2610 // Parse error: ignore the token. |
1097 if ( $this->state->stack_of_open_elements->current_node()->node_name !== $token_name ) { |
2611 return $this->step(); |
1098 // @todo Record parse error: this error doesn't impact parsing. |
2612 } |
1099 } |
2613 |
1100 |
2614 $this->generate_implied_end_tags(); |
1101 $this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' ); |
2615 if ( $node !== $this->state->stack_of_open_elements->current_node() ) { |
1102 return true; |
2616 // @todo Indicate a parse error once it's possible. This error does not impact the logic here. |
1103 |
2617 $this->bail( 'Cannot close a FORM when other elements remain open as this would throw off the breadcrumbs for the following tokens.' ); |
1104 /* |
2618 } |
1105 * > A start tag whose tag name is "li" |
2619 |
1106 * > A start tag whose tag name is one of: "dd", "dt" |
2620 $this->state->stack_of_open_elements->remove_node( $node ); |
1107 */ |
2621 return true; |
1108 case '+DD': |
2622 } else { |
1109 case '+DT': |
2623 /* |
1110 case '+LI': |
2624 * > If the stack of open elements does not have a form element in scope, |
1111 $this->state->frameset_ok = false; |
2625 * > then this is a parse error; return and ignore the token. |
1112 $node = $this->state->stack_of_open_elements->current_node(); |
2626 * |
1113 $is_li = 'LI' === $token_name; |
2627 * Note that unlike in the clause above, this is checking for any FORM in scope. |
1114 |
2628 */ |
1115 in_body_list_loop: |
2629 if ( ! $this->state->stack_of_open_elements->has_element_in_scope( 'FORM' ) ) { |
1116 /* |
2630 // Parse error: ignore the token. |
1117 * The logic for LI and DT/DD is the same except for one point: LI elements _only_ |
2631 return $this->step(); |
1118 * close other LI elements, but a DT or DD element closes _any_ open DT or DD element. |
2632 } |
1119 */ |
2633 |
1120 if ( $is_li ? 'LI' === $node->node_name : ( 'DD' === $node->node_name || 'DT' === $node->node_name ) ) { |
2634 $this->generate_implied_end_tags(); |
1121 $node_name = $is_li ? 'LI' : $node->node_name; |
2635 |
1122 $this->generate_implied_end_tags( $node_name ); |
2636 if ( ! $this->state->stack_of_open_elements->current_node_is( 'FORM' ) ) { |
1123 if ( $node_name !== $this->state->stack_of_open_elements->current_node()->node_name ) { |
|
1124 // @todo Indicate a parse error once it's possible. This error does not impact the logic here. |
2637 // @todo Indicate a parse error once it's possible. This error does not impact the logic here. |
1125 } |
2638 } |
1126 |
2639 |
1127 $this->state->stack_of_open_elements->pop_until( $node_name ); |
2640 $this->state->stack_of_open_elements->pop_until( 'FORM' ); |
1128 goto in_body_list_done; |
2641 return true; |
1129 } |
2642 } |
1130 |
2643 break; |
1131 if ( |
2644 |
1132 'ADDRESS' !== $node->node_name && |
2645 /* |
1133 'DIV' !== $node->node_name && |
2646 * > An end tag whose tag name is "p" |
1134 'P' !== $node->node_name && |
2647 */ |
1135 $this->is_special( $node->node_name ) |
2648 case '-P': |
1136 ) { |
2649 if ( ! $this->state->stack_of_open_elements->has_p_in_button_scope() ) { |
1137 /* |
2650 $this->insert_html_element( $this->state->current_token ); |
1138 * > If node is in the special category, but is not an address, div, |
2651 } |
1139 * > or p element, then jump to the step labeled done below. |
2652 |
1140 */ |
2653 $this->close_a_p_element(); |
1141 goto in_body_list_done; |
|
1142 } else { |
|
1143 /* |
|
1144 * > Otherwise, set node to the previous entry in the stack of open elements |
|
1145 * > and return to the step labeled loop. |
|
1146 */ |
|
1147 foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) { |
|
1148 $node = $item; |
|
1149 break; |
|
1150 } |
|
1151 goto in_body_list_loop; |
|
1152 } |
|
1153 |
|
1154 in_body_list_done: |
|
1155 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { |
|
1156 $this->close_a_p_element(); |
|
1157 } |
|
1158 |
|
1159 $this->insert_html_element( $this->state->current_token ); |
|
1160 return true; |
2654 return true; |
1161 |
2655 |
1162 /* |
2656 /* |
1163 * > An end tag whose tag name is "li" |
2657 * > An end tag whose tag name is "li" |
1164 * > An end tag whose tag name is one of: "dd", "dt" |
2658 * > An end tag whose tag name is one of: "dd", "dt" |
1195 return $this->step(); |
2689 return $this->step(); |
1196 } |
2690 } |
1197 |
2691 |
1198 $this->generate_implied_end_tags( $token_name ); |
2692 $this->generate_implied_end_tags( $token_name ); |
1199 |
2693 |
1200 if ( $token_name !== $this->state->stack_of_open_elements->current_node()->node_name ) { |
2694 if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) { |
1201 // @todo Indicate a parse error once it's possible. This error does not impact the logic here. |
2695 // @todo Indicate a parse error once it's possible. This error does not impact the logic here. |
1202 } |
2696 } |
1203 |
2697 |
1204 $this->state->stack_of_open_elements->pop_until( $token_name ); |
2698 $this->state->stack_of_open_elements->pop_until( $token_name ); |
1205 return true; |
2699 return true; |
1206 |
2700 |
1207 /* |
2701 /* |
1208 * > An end tag whose tag name is "p" |
2702 * > An end tag whose tag name is one of: "h1", "h2", "h3", "h4", "h5", "h6" |
1209 */ |
2703 */ |
1210 case '-P': |
2704 case '-H1': |
1211 if ( ! $this->state->stack_of_open_elements->has_p_in_button_scope() ) { |
2705 case '-H2': |
1212 $this->insert_html_element( $this->state->current_token ); |
2706 case '-H3': |
1213 } |
2707 case '-H4': |
1214 |
2708 case '-H5': |
1215 $this->close_a_p_element(); |
2709 case '-H6': |
1216 return true; |
2710 if ( ! $this->state->stack_of_open_elements->has_element_in_scope( '(internal: H1 through H6 - do not use)' ) ) { |
1217 |
2711 /* |
1218 // > A start tag whose tag name is "a" |
2712 * This is a parse error; ignore the token. |
2713 * |
|
2714 * @todo Indicate a parse error once it's possible. |
|
2715 */ |
|
2716 return $this->step(); |
|
2717 } |
|
2718 |
|
2719 $this->generate_implied_end_tags(); |
|
2720 |
|
2721 if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) { |
|
2722 // @todo Record parse error: this error doesn't impact parsing. |
|
2723 } |
|
2724 |
|
2725 $this->state->stack_of_open_elements->pop_until( '(internal: H1 through H6 - do not use)' ); |
|
2726 return true; |
|
2727 |
|
2728 /* |
|
2729 * > A start tag whose tag name is "a" |
|
2730 */ |
|
1219 case '+A': |
2731 case '+A': |
1220 foreach ( $this->state->active_formatting_elements->walk_up() as $item ) { |
2732 foreach ( $this->state->active_formatting_elements->walk_up() as $item ) { |
1221 switch ( $item->node_name ) { |
2733 switch ( $item->node_name ) { |
1222 case 'marker': |
2734 case 'marker': |
1223 break; |
2735 break 2; |
1224 |
2736 |
1225 case 'A': |
2737 case 'A': |
1226 $this->run_adoption_agency_algorithm(); |
2738 $this->run_adoption_agency_algorithm(); |
1227 $this->state->active_formatting_elements->remove_node( $item ); |
2739 $this->state->active_formatting_elements->remove_node( $item ); |
1228 $this->state->stack_of_open_elements->remove_node( $item ); |
2740 $this->state->stack_of_open_elements->remove_node( $item ); |
1229 break; |
2741 break 2; |
1230 } |
2742 } |
1231 } |
2743 } |
1232 |
2744 |
1233 $this->reconstruct_active_formatting_elements(); |
2745 $this->reconstruct_active_formatting_elements(); |
1234 $this->insert_html_element( $this->state->current_token ); |
2746 $this->insert_html_element( $this->state->current_token ); |
1255 $this->insert_html_element( $this->state->current_token ); |
2767 $this->insert_html_element( $this->state->current_token ); |
1256 $this->state->active_formatting_elements->push( $this->state->current_token ); |
2768 $this->state->active_formatting_elements->push( $this->state->current_token ); |
1257 return true; |
2769 return true; |
1258 |
2770 |
1259 /* |
2771 /* |
2772 * > A start tag whose tag name is "nobr" |
|
2773 */ |
|
2774 case '+NOBR': |
|
2775 $this->reconstruct_active_formatting_elements(); |
|
2776 |
|
2777 if ( $this->state->stack_of_open_elements->has_element_in_scope( 'NOBR' ) ) { |
|
2778 // Parse error. |
|
2779 $this->run_adoption_agency_algorithm(); |
|
2780 $this->reconstruct_active_formatting_elements(); |
|
2781 } |
|
2782 |
|
2783 $this->insert_html_element( $this->state->current_token ); |
|
2784 $this->state->active_formatting_elements->push( $this->state->current_token ); |
|
2785 return true; |
|
2786 |
|
2787 /* |
|
1260 * > An end tag whose tag name is one of: "a", "b", "big", "code", "em", "font", "i", |
2788 * > An end tag whose tag name is one of: "a", "b", "big", "code", "em", "font", "i", |
1261 * > "nobr", "s", "small", "strike", "strong", "tt", "u" |
2789 * > "nobr", "s", "small", "strike", "strong", "tt", "u" |
1262 */ |
2790 */ |
1263 case '-A': |
2791 case '-A': |
1264 case '-B': |
2792 case '-B': |
1265 case '-BIG': |
2793 case '-BIG': |
1266 case '-CODE': |
2794 case '-CODE': |
1267 case '-EM': |
2795 case '-EM': |
1268 case '-FONT': |
2796 case '-FONT': |
1269 case '-I': |
2797 case '-I': |
2798 case '-NOBR': |
|
1270 case '-S': |
2799 case '-S': |
1271 case '-SMALL': |
2800 case '-SMALL': |
1272 case '-STRIKE': |
2801 case '-STRIKE': |
1273 case '-STRONG': |
2802 case '-STRONG': |
1274 case '-TT': |
2803 case '-TT': |
1275 case '-U': |
2804 case '-U': |
1276 $this->run_adoption_agency_algorithm(); |
2805 $this->run_adoption_agency_algorithm(); |
1277 return true; |
2806 return true; |
1278 |
2807 |
1279 /* |
2808 /* |
2809 * > A start tag whose tag name is one of: "applet", "marquee", "object" |
|
2810 */ |
|
2811 case '+APPLET': |
|
2812 case '+MARQUEE': |
|
2813 case '+OBJECT': |
|
2814 $this->reconstruct_active_formatting_elements(); |
|
2815 $this->insert_html_element( $this->state->current_token ); |
|
2816 $this->state->active_formatting_elements->insert_marker(); |
|
2817 $this->state->frameset_ok = false; |
|
2818 return true; |
|
2819 |
|
2820 /* |
|
2821 * > A end tag token whose tag name is one of: "applet", "marquee", "object" |
|
2822 */ |
|
2823 case '-APPLET': |
|
2824 case '-MARQUEE': |
|
2825 case '-OBJECT': |
|
2826 if ( ! $this->state->stack_of_open_elements->has_element_in_scope( $token_name ) ) { |
|
2827 // Parse error: ignore the token. |
|
2828 return $this->step(); |
|
2829 } |
|
2830 |
|
2831 $this->generate_implied_end_tags(); |
|
2832 if ( ! $this->state->stack_of_open_elements->current_node_is( $token_name ) ) { |
|
2833 // This is a parse error. |
|
2834 } |
|
2835 |
|
2836 $this->state->stack_of_open_elements->pop_until( $token_name ); |
|
2837 $this->state->active_formatting_elements->clear_up_to_last_marker(); |
|
2838 return true; |
|
2839 |
|
2840 /* |
|
2841 * > A start tag whose tag name is "table" |
|
2842 */ |
|
2843 case '+TABLE': |
|
2844 /* |
|
2845 * > If the Document is not set to quirks mode, and the stack of open elements |
|
2846 * > has a p element in button scope, then close a p element. |
|
2847 */ |
|
2848 if ( |
|
2849 WP_HTML_Tag_Processor::QUIRKS_MODE !== $this->compat_mode && |
|
2850 $this->state->stack_of_open_elements->has_p_in_button_scope() |
|
2851 ) { |
|
2852 $this->close_a_p_element(); |
|
2853 } |
|
2854 |
|
2855 $this->insert_html_element( $this->state->current_token ); |
|
2856 $this->state->frameset_ok = false; |
|
2857 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; |
|
2858 return true; |
|
2859 |
|
2860 /* |
|
1280 * > An end tag whose tag name is "br" |
2861 * > An end tag whose tag name is "br" |
1281 * > Parse error. Drop the attributes from the token, and act as described in the next |
2862 * |
1282 * > entry; i.e. act as if this was a "br" start tag token with no attributes, rather |
2863 * This is prevented from happening because the Tag Processor |
1283 * > than the end tag token that it actually is. |
2864 * reports all closing BR tags as if they were opening tags. |
1284 */ |
2865 */ |
1285 case '-BR': |
|
1286 $this->last_error = self::ERROR_UNSUPPORTED; |
|
1287 throw new WP_HTML_Unsupported_Exception( 'Closing BR tags require unimplemented special handling.' ); |
|
1288 |
2866 |
1289 /* |
2867 /* |
1290 * > A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen", "wbr" |
2868 * > A start tag whose tag name is one of: "area", "br", "embed", "img", "keygen", "wbr" |
1291 */ |
2869 */ |
1292 case '+AREA': |
2870 case '+AREA': |
1304 * > A start tag whose tag name is "input" |
2882 * > A start tag whose tag name is "input" |
1305 */ |
2883 */ |
1306 case '+INPUT': |
2884 case '+INPUT': |
1307 $this->reconstruct_active_formatting_elements(); |
2885 $this->reconstruct_active_formatting_elements(); |
1308 $this->insert_html_element( $this->state->current_token ); |
2886 $this->insert_html_element( $this->state->current_token ); |
1309 $type_attribute = $this->get_attribute( 'type' ); |
2887 |
1310 /* |
2888 /* |
1311 * > If the token does not have an attribute with the name "type", or if it does, |
2889 * > If the token does not have an attribute with the name "type", or if it does, |
1312 * > but that attribute's value is not an ASCII case-insensitive match for the |
2890 * > but that attribute's value is not an ASCII case-insensitive match for the |
1313 * > string "hidden", then: set the frameset-ok flag to "not ok". |
2891 * > string "hidden", then: set the frameset-ok flag to "not ok". |
1314 */ |
2892 */ |
2893 $type_attribute = $this->get_attribute( 'type' ); |
|
1315 if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) { |
2894 if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) { |
1316 $this->state->frameset_ok = false; |
2895 $this->state->frameset_ok = false; |
1317 } |
2896 } |
1318 return true; |
2897 |
1319 |
|
1320 /* |
|
1321 * > A start tag whose tag name is "hr" |
|
1322 */ |
|
1323 case '+HR': |
|
1324 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { |
|
1325 $this->close_a_p_element(); |
|
1326 } |
|
1327 $this->insert_html_element( $this->state->current_token ); |
|
1328 $this->state->frameset_ok = false; |
|
1329 return true; |
2898 return true; |
1330 |
2899 |
1331 /* |
2900 /* |
1332 * > A start tag whose tag name is one of: "param", "source", "track" |
2901 * > A start tag whose tag name is one of: "param", "source", "track" |
1333 */ |
2902 */ |
1334 case '+PARAM': |
2903 case '+PARAM': |
1335 case '+SOURCE': |
2904 case '+SOURCE': |
1336 case '+TRACK': |
2905 case '+TRACK': |
1337 $this->insert_html_element( $this->state->current_token ); |
2906 $this->insert_html_element( $this->state->current_token ); |
1338 return true; |
2907 return true; |
1339 } |
2908 |
1340 |
2909 /* |
1341 /* |
2910 * > A start tag whose tag name is "hr" |
1342 * These tags require special handling in the 'in body' insertion mode |
2911 */ |
1343 * but that handling hasn't yet been implemented. |
2912 case '+HR': |
1344 * |
2913 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { |
1345 * As the rules for each tag are implemented, the corresponding tag |
2914 $this->close_a_p_element(); |
1346 * name should be removed from this list. An accompanying test should |
2915 } |
1347 * help ensure this list is maintained. |
2916 $this->insert_html_element( $this->state->current_token ); |
1348 * |
2917 $this->state->frameset_ok = false; |
1349 * @see Tests_HtmlApi_WpHtmlProcessor::test_step_in_body_fails_on_unsupported_tags |
2918 return true; |
1350 * |
2919 |
1351 * Since this switch structure throws a WP_HTML_Unsupported_Exception, it's |
2920 /* |
1352 * possible to handle "any other start tag" and "any other end tag" below, |
2921 * > A start tag whose tag name is "image" |
1353 * as that guarantees execution doesn't proceed for the unimplemented tags. |
2922 */ |
1354 * |
2923 case '+IMAGE': |
1355 * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inbody |
2924 /* |
1356 */ |
2925 * > Parse error. Change the token's tag name to "img" and reprocess it. (Don't ask.) |
1357 switch ( $token_name ) { |
2926 * |
1358 case 'APPLET': |
2927 * Note that this is handled elsewhere, so it should not be possible to reach this code. |
1359 case 'BASE': |
2928 */ |
1360 case 'BASEFONT': |
2929 $this->bail( "Cannot process an IMAGE tag. (Don't ask.)" ); |
1361 case 'BGSOUND': |
2930 break; |
1362 case 'BODY': |
2931 |
1363 case 'CAPTION': |
2932 /* |
1364 case 'COL': |
2933 * > A start tag whose tag name is "textarea" |
1365 case 'COLGROUP': |
2934 */ |
1366 case 'FORM': |
2935 case '+TEXTAREA': |
1367 case 'FRAME': |
2936 $this->insert_html_element( $this->state->current_token ); |
1368 case 'FRAMESET': |
2937 |
1369 case 'HEAD': |
2938 /* |
1370 case 'HTML': |
2939 * > If the next token is a U+000A LINE FEED (LF) character token, then ignore |
1371 case 'IFRAME': |
2940 * > that token and move on to the next one. (Newlines at the start of |
1372 case 'LINK': |
2941 * > textarea elements are ignored as an authoring convenience.) |
1373 case 'MARQUEE': |
2942 * |
1374 case 'MATH': |
2943 * This is handled in `get_modifiable_text()`. |
1375 case 'META': |
2944 */ |
1376 case 'NOBR': |
2945 |
1377 case 'NOEMBED': |
2946 $this->state->frameset_ok = false; |
1378 case 'NOFRAMES': |
2947 |
1379 case 'NOSCRIPT': |
2948 /* |
1380 case 'OBJECT': |
2949 * > Switch the insertion mode to "text". |
1381 case 'OPTGROUP': |
2950 * |
1382 case 'OPTION': |
2951 * As a self-contained node, this behavior is handled in the Tag Processor. |
1383 case 'PLAINTEXT': |
2952 */ |
1384 case 'RB': |
2953 return true; |
1385 case 'RP': |
2954 |
1386 case 'RT': |
2955 /* |
1387 case 'RTC': |
2956 * > A start tag whose tag name is "xmp" |
1388 case 'SARCASM': |
2957 */ |
1389 case 'SCRIPT': |
2958 case '+XMP': |
1390 case 'SELECT': |
2959 if ( $this->state->stack_of_open_elements->has_p_in_button_scope() ) { |
1391 case 'STYLE': |
2960 $this->close_a_p_element(); |
1392 case 'SVG': |
2961 } |
1393 case 'TABLE': |
2962 |
1394 case 'TBODY': |
2963 $this->reconstruct_active_formatting_elements(); |
1395 case 'TD': |
2964 $this->state->frameset_ok = false; |
1396 case 'TEMPLATE': |
2965 |
1397 case 'TEXTAREA': |
2966 /* |
1398 case 'TFOOT': |
2967 * > Follow the generic raw text element parsing algorithm. |
1399 case 'TH': |
2968 * |
1400 case 'THEAD': |
2969 * As a self-contained node, this behavior is handled in the Tag Processor. |
1401 case 'TITLE': |
2970 */ |
1402 case 'TR': |
2971 $this->insert_html_element( $this->state->current_token ); |
1403 case 'XMP': |
2972 return true; |
1404 $this->last_error = self::ERROR_UNSUPPORTED; |
2973 |
1405 throw new WP_HTML_Unsupported_Exception( "Cannot process {$token_name} element." ); |
2974 /* |
2975 * A start tag whose tag name is "iframe" |
|
2976 */ |
|
2977 case '+IFRAME': |
|
2978 $this->state->frameset_ok = false; |
|
2979 |
|
2980 /* |
|
2981 * > Follow the generic raw text element parsing algorithm. |
|
2982 * |
|
2983 * As a self-contained node, this behavior is handled in the Tag Processor. |
|
2984 */ |
|
2985 $this->insert_html_element( $this->state->current_token ); |
|
2986 return true; |
|
2987 |
|
2988 /* |
|
2989 * > A start tag whose tag name is "noembed" |
|
2990 * > A start tag whose tag name is "noscript", if the scripting flag is enabled |
|
2991 * |
|
2992 * The scripting flag is never enabled in this parser. |
|
2993 */ |
|
2994 case '+NOEMBED': |
|
2995 $this->insert_html_element( $this->state->current_token ); |
|
2996 return true; |
|
2997 |
|
2998 /* |
|
2999 * > A start tag whose tag name is "select" |
|
3000 */ |
|
3001 case '+SELECT': |
|
3002 $this->reconstruct_active_formatting_elements(); |
|
3003 $this->insert_html_element( $this->state->current_token ); |
|
3004 $this->state->frameset_ok = false; |
|
3005 |
|
3006 switch ( $this->state->insertion_mode ) { |
|
3007 /* |
|
3008 * > If the insertion mode is one of "in table", "in caption", "in table body", "in row", |
|
3009 * > or "in cell", then switch the insertion mode to "in select in table". |
|
3010 */ |
|
3011 case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE: |
|
3012 case WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION: |
|
3013 case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY: |
|
3014 case WP_HTML_Processor_State::INSERTION_MODE_IN_ROW: |
|
3015 case WP_HTML_Processor_State::INSERTION_MODE_IN_CELL: |
|
3016 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE; |
|
3017 break; |
|
3018 |
|
3019 /* |
|
3020 * > Otherwise, switch the insertion mode to "in select". |
|
3021 */ |
|
3022 default: |
|
3023 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT; |
|
3024 break; |
|
3025 } |
|
3026 return true; |
|
3027 |
|
3028 /* |
|
3029 * > A start tag whose tag name is one of: "optgroup", "option" |
|
3030 */ |
|
3031 case '+OPTGROUP': |
|
3032 case '+OPTION': |
|
3033 if ( $this->state->stack_of_open_elements->current_node_is( 'OPTION' ) ) { |
|
3034 $this->state->stack_of_open_elements->pop(); |
|
3035 } |
|
3036 $this->reconstruct_active_formatting_elements(); |
|
3037 $this->insert_html_element( $this->state->current_token ); |
|
3038 return true; |
|
3039 |
|
3040 /* |
|
3041 * > A start tag whose tag name is one of: "rb", "rtc" |
|
3042 */ |
|
3043 case '+RB': |
|
3044 case '+RTC': |
|
3045 if ( $this->state->stack_of_open_elements->has_element_in_scope( 'RUBY' ) ) { |
|
3046 $this->generate_implied_end_tags(); |
|
3047 |
|
3048 if ( $this->state->stack_of_open_elements->current_node_is( 'RUBY' ) ) { |
|
3049 // @todo Indicate a parse error once it's possible. |
|
3050 } |
|
3051 } |
|
3052 |
|
3053 $this->insert_html_element( $this->state->current_token ); |
|
3054 return true; |
|
3055 |
|
3056 /* |
|
3057 * > A start tag whose tag name is one of: "rp", "rt" |
|
3058 */ |
|
3059 case '+RP': |
|
3060 case '+RT': |
|
3061 if ( $this->state->stack_of_open_elements->has_element_in_scope( 'RUBY' ) ) { |
|
3062 $this->generate_implied_end_tags( 'RTC' ); |
|
3063 |
|
3064 $current_node_name = $this->state->stack_of_open_elements->current_node()->node_name; |
|
3065 if ( 'RTC' === $current_node_name || 'RUBY' === $current_node_name ) { |
|
3066 // @todo Indicate a parse error once it's possible. |
|
3067 } |
|
3068 } |
|
3069 |
|
3070 $this->insert_html_element( $this->state->current_token ); |
|
3071 return true; |
|
3072 |
|
3073 /* |
|
3074 * > A start tag whose tag name is "math" |
|
3075 */ |
|
3076 case '+MATH': |
|
3077 $this->reconstruct_active_formatting_elements(); |
|
3078 |
|
3079 /* |
|
3080 * @todo Adjust MathML attributes for the token. (This fixes the case of MathML attributes that are not all lowercase.) |
|
3081 * @todo Adjust foreign attributes for the token. (This fixes the use of namespaced attributes, in particular XLink.) |
|
3082 * |
|
3083 * These ought to be handled in the attribute methods. |
|
3084 */ |
|
3085 $this->state->current_token->namespace = 'math'; |
|
3086 $this->insert_html_element( $this->state->current_token ); |
|
3087 if ( $this->state->current_token->has_self_closing_flag ) { |
|
3088 $this->state->stack_of_open_elements->pop(); |
|
3089 } |
|
3090 return true; |
|
3091 |
|
3092 /* |
|
3093 * > A start tag whose tag name is "svg" |
|
3094 */ |
|
3095 case '+SVG': |
|
3096 $this->reconstruct_active_formatting_elements(); |
|
3097 |
|
3098 /* |
|
3099 * @todo Adjust SVG attributes for the token. (This fixes the case of SVG attributes that are not all lowercase.) |
|
3100 * @todo Adjust foreign attributes for the token. (This fixes the use of namespaced attributes, in particular XLink in SVG.) |
|
3101 * |
|
3102 * These ought to be handled in the attribute methods. |
|
3103 */ |
|
3104 $this->state->current_token->namespace = 'svg'; |
|
3105 $this->insert_html_element( $this->state->current_token ); |
|
3106 if ( $this->state->current_token->has_self_closing_flag ) { |
|
3107 $this->state->stack_of_open_elements->pop(); |
|
3108 } |
|
3109 return true; |
|
3110 |
|
3111 /* |
|
3112 * > A start tag whose tag name is one of: "caption", "col", "colgroup", |
|
3113 * > "frame", "head", "tbody", "td", "tfoot", "th", "thead", "tr" |
|
3114 */ |
|
3115 case '+CAPTION': |
|
3116 case '+COL': |
|
3117 case '+COLGROUP': |
|
3118 case '+FRAME': |
|
3119 case '+HEAD': |
|
3120 case '+TBODY': |
|
3121 case '+TD': |
|
3122 case '+TFOOT': |
|
3123 case '+TH': |
|
3124 case '+THEAD': |
|
3125 case '+TR': |
|
3126 // Parse error. Ignore the token. |
|
3127 return $this->step(); |
|
1406 } |
3128 } |
1407 |
3129 |
1408 if ( ! parent::is_tag_closer() ) { |
3130 if ( ! parent::is_tag_closer() ) { |
1409 /* |
3131 /* |
1410 * > Any other start tag |
3132 * > Any other start tag |
1422 * it exists before reaching a special element, which provides a kind |
3144 * it exists before reaching a special element, which provides a kind |
1423 * of boundary in the stack. For example, a `</custom-tag>` should not |
3145 * of boundary in the stack. For example, a `</custom-tag>` should not |
1424 * close anything beyond its containing `P` or `DIV` element. |
3146 * close anything beyond its containing `P` or `DIV` element. |
1425 */ |
3147 */ |
1426 foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { |
3148 foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { |
1427 if ( $token_name === $node->node_name ) { |
3149 if ( 'html' === $node->namespace && $token_name === $node->node_name ) { |
1428 break; |
3150 break; |
1429 } |
3151 } |
1430 |
3152 |
1431 if ( self::is_special( $node->node_name ) ) { |
3153 if ( self::is_special( $node ) ) { |
1432 // This is a parse error, ignore the token. |
3154 // This is a parse error, ignore the token. |
1433 return $this->step(); |
3155 return $this->step(); |
1434 } |
3156 } |
1435 } |
3157 } |
1436 |
3158 |
1444 if ( $node === $item ) { |
3166 if ( $node === $item ) { |
1445 return true; |
3167 return true; |
1446 } |
3168 } |
1447 } |
3169 } |
1448 } |
3170 } |
3171 |
|
3172 $this->bail( 'Should not have been able to reach end of IN BODY processing. Check HTML API code.' ); |
|
3173 // This unnecessary return prevents tools from inaccurately reporting type errors. |
|
3174 return false; |
|
3175 } |
|
3176 |
|
3177 /** |
|
3178 * Parses next element in the 'in table' insertion mode. |
|
3179 * |
|
3180 * This internal function performs the 'in table' insertion mode |
|
3181 * logic for the generalized WP_HTML_Processor::step() function. |
|
3182 * |
|
3183 * @since 6.7.0 |
|
3184 * |
|
3185 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
3186 * |
|
3187 * @see https://html.spec.whatwg.org/#parsing-main-intable |
|
3188 * @see WP_HTML_Processor::step |
|
3189 * |
|
3190 * @return bool Whether an element was found. |
|
3191 */ |
|
3192 private function step_in_table(): bool { |
|
3193 $token_name = $this->get_token_name(); |
|
3194 $token_type = $this->get_token_type(); |
|
3195 $op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : ''; |
|
3196 $op = "{$op_sigil}{$token_name}"; |
|
3197 |
|
3198 switch ( $op ) { |
|
3199 /* |
|
3200 * > A character token, if the current node is table, |
|
3201 * > tbody, template, tfoot, thead, or tr element |
|
3202 */ |
|
3203 case '#text': |
|
3204 $current_node = $this->state->stack_of_open_elements->current_node(); |
|
3205 $current_node_name = $current_node ? $current_node->node_name : null; |
|
3206 if ( |
|
3207 $current_node_name && ( |
|
3208 'TABLE' === $current_node_name || |
|
3209 'TBODY' === $current_node_name || |
|
3210 'TEMPLATE' === $current_node_name || |
|
3211 'TFOOT' === $current_node_name || |
|
3212 'THEAD' === $current_node_name || |
|
3213 'TR' === $current_node_name |
|
3214 ) |
|
3215 ) { |
|
3216 /* |
|
3217 * If the text is empty after processing HTML entities and stripping |
|
3218 * U+0000 NULL bytes then ignore the token. |
|
3219 */ |
|
3220 if ( parent::TEXT_IS_NULL_SEQUENCE === $this->text_node_classification ) { |
|
3221 return $this->step(); |
|
3222 } |
|
3223 |
|
3224 /* |
|
3225 * This follows the rules for "in table text" insertion mode. |
|
3226 * |
|
3227 * Whitespace-only text nodes are inserted in-place. Otherwise |
|
3228 * foster parenting is enabled and the nodes would be |
|
3229 * inserted out-of-place. |
|
3230 * |
|
3231 * > If any of the tokens in the pending table character tokens |
|
3232 * > list are character tokens that are not ASCII whitespace, |
|
3233 * > then this is a parse error: reprocess the character tokens |
|
3234 * > in the pending table character tokens list using the rules |
|
3235 * > given in the "anything else" entry in the "in table" |
|
3236 * > insertion mode. |
|
3237 * > |
|
3238 * > Otherwise, insert the characters given by the pending table |
|
3239 * > character tokens list. |
|
3240 * |
|
3241 * @see https://html.spec.whatwg.org/#parsing-main-intabletext |
|
3242 */ |
|
3243 if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { |
|
3244 $this->insert_html_element( $this->state->current_token ); |
|
3245 return true; |
|
3246 } |
|
3247 |
|
3248 // Non-whitespace would trigger fostering, unsupported at this time. |
|
3249 $this->bail( 'Foster parenting is not supported.' ); |
|
3250 break; |
|
3251 } |
|
3252 break; |
|
3253 |
|
3254 /* |
|
3255 * > A comment token |
|
3256 */ |
|
3257 case '#comment': |
|
3258 case '#funky-comment': |
|
3259 case '#presumptuous-tag': |
|
3260 $this->insert_html_element( $this->state->current_token ); |
|
3261 return true; |
|
3262 |
|
3263 /* |
|
3264 * > A DOCTYPE token |
|
3265 */ |
|
3266 case 'html': |
|
3267 // Parse error: ignore the token. |
|
3268 return $this->step(); |
|
3269 |
|
3270 /* |
|
3271 * > A start tag whose tag name is "caption" |
|
3272 */ |
|
3273 case '+CAPTION': |
|
3274 $this->state->stack_of_open_elements->clear_to_table_context(); |
|
3275 $this->state->active_formatting_elements->insert_marker(); |
|
3276 $this->insert_html_element( $this->state->current_token ); |
|
3277 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION; |
|
3278 return true; |
|
3279 |
|
3280 /* |
|
3281 * > A start tag whose tag name is "colgroup" |
|
3282 */ |
|
3283 case '+COLGROUP': |
|
3284 $this->state->stack_of_open_elements->clear_to_table_context(); |
|
3285 $this->insert_html_element( $this->state->current_token ); |
|
3286 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; |
|
3287 return true; |
|
3288 |
|
3289 /* |
|
3290 * > A start tag whose tag name is "col" |
|
3291 */ |
|
3292 case '+COL': |
|
3293 $this->state->stack_of_open_elements->clear_to_table_context(); |
|
3294 |
|
3295 /* |
|
3296 * > Insert an HTML element for a "colgroup" start tag token with no attributes, |
|
3297 * > then switch the insertion mode to "in column group". |
|
3298 */ |
|
3299 $this->insert_virtual_node( 'COLGROUP' ); |
|
3300 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; |
|
3301 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
3302 |
|
3303 /* |
|
3304 * > A start tag whose tag name is one of: "tbody", "tfoot", "thead" |
|
3305 */ |
|
3306 case '+TBODY': |
|
3307 case '+TFOOT': |
|
3308 case '+THEAD': |
|
3309 $this->state->stack_of_open_elements->clear_to_table_context(); |
|
3310 $this->insert_html_element( $this->state->current_token ); |
|
3311 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; |
|
3312 return true; |
|
3313 |
|
3314 /* |
|
3315 * > A start tag whose tag name is one of: "td", "th", "tr" |
|
3316 */ |
|
3317 case '+TD': |
|
3318 case '+TH': |
|
3319 case '+TR': |
|
3320 $this->state->stack_of_open_elements->clear_to_table_context(); |
|
3321 /* |
|
3322 * > Insert an HTML element for a "tbody" start tag token with no attributes, |
|
3323 * > then switch the insertion mode to "in table body". |
|
3324 */ |
|
3325 $this->insert_virtual_node( 'TBODY' ); |
|
3326 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; |
|
3327 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
3328 |
|
3329 /* |
|
3330 * > A start tag whose tag name is "table" |
|
3331 * |
|
3332 * This tag in the IN TABLE insertion mode is a parse error. |
|
3333 */ |
|
3334 case '+TABLE': |
|
3335 if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TABLE' ) ) { |
|
3336 return $this->step(); |
|
3337 } |
|
3338 |
|
3339 $this->state->stack_of_open_elements->pop_until( 'TABLE' ); |
|
3340 $this->reset_insertion_mode_appropriately(); |
|
3341 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
3342 |
|
3343 /* |
|
3344 * > An end tag whose tag name is "table" |
|
3345 */ |
|
3346 case '-TABLE': |
|
3347 if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TABLE' ) ) { |
|
3348 // @todo Indicate a parse error once it's possible. |
|
3349 return $this->step(); |
|
3350 } |
|
3351 |
|
3352 $this->state->stack_of_open_elements->pop_until( 'TABLE' ); |
|
3353 $this->reset_insertion_mode_appropriately(); |
|
3354 return true; |
|
3355 |
|
3356 /* |
|
3357 * > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" |
|
3358 */ |
|
3359 case '-BODY': |
|
3360 case '-CAPTION': |
|
3361 case '-COL': |
|
3362 case '-COLGROUP': |
|
3363 case '-HTML': |
|
3364 case '-TBODY': |
|
3365 case '-TD': |
|
3366 case '-TFOOT': |
|
3367 case '-TH': |
|
3368 case '-THEAD': |
|
3369 case '-TR': |
|
3370 // Parse error: ignore the token. |
|
3371 return $this->step(); |
|
3372 |
|
3373 /* |
|
3374 * > A start tag whose tag name is one of: "style", "script", "template" |
|
3375 * > An end tag whose tag name is "template" |
|
3376 */ |
|
3377 case '+STYLE': |
|
3378 case '+SCRIPT': |
|
3379 case '+TEMPLATE': |
|
3380 case '-TEMPLATE': |
|
3381 /* |
|
3382 * > Process the token using the rules for the "in head" insertion mode. |
|
3383 */ |
|
3384 return $this->step_in_head(); |
|
3385 |
|
3386 /* |
|
3387 * > A start tag whose tag name is "input" |
|
3388 * |
|
3389 * > If the token does not have an attribute with the name "type", or if it does, but |
|
3390 * > that attribute's value is not an ASCII case-insensitive match for the string |
|
3391 * > "hidden", then: act as described in the "anything else" entry below. |
|
3392 */ |
|
3393 case '+INPUT': |
|
3394 $type_attribute = $this->get_attribute( 'type' ); |
|
3395 if ( ! is_string( $type_attribute ) || 'hidden' !== strtolower( $type_attribute ) ) { |
|
3396 goto anything_else; |
|
3397 } |
|
3398 // @todo Indicate a parse error once it's possible. |
|
3399 $this->insert_html_element( $this->state->current_token ); |
|
3400 return true; |
|
3401 |
|
3402 /* |
|
3403 * > A start tag whose tag name is "form" |
|
3404 * |
|
3405 * This tag in the IN TABLE insertion mode is a parse error. |
|
3406 */ |
|
3407 case '+FORM': |
|
3408 if ( |
|
3409 $this->state->stack_of_open_elements->has_element_in_scope( 'TEMPLATE' ) || |
|
3410 isset( $this->state->form_element ) |
|
3411 ) { |
|
3412 return $this->step(); |
|
3413 } |
|
3414 |
|
3415 // This FORM is special because it immediately closes and cannot have other children. |
|
3416 $this->insert_html_element( $this->state->current_token ); |
|
3417 $this->state->form_element = $this->state->current_token; |
|
3418 $this->state->stack_of_open_elements->pop(); |
|
3419 return true; |
|
3420 } |
|
3421 |
|
3422 /* |
|
3423 * > Anything else |
|
3424 * > Parse error. Enable foster parenting, process the token using the rules for the |
|
3425 * > "in body" insertion mode, and then disable foster parenting. |
|
3426 * |
|
3427 * @todo Indicate a parse error once it's possible. |
|
3428 */ |
|
3429 anything_else: |
|
3430 $this->bail( 'Foster parenting is not supported.' ); |
|
3431 } |
|
3432 |
|
3433 /** |
|
3434 * Parses next element in the 'in table text' insertion mode. |
|
3435 * |
|
3436 * This internal function performs the 'in table text' insertion mode |
|
3437 * logic for the generalized WP_HTML_Processor::step() function. |
|
3438 * |
|
3439 * @since 6.7.0 Stub implementation. |
|
3440 * |
|
3441 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
3442 * |
|
3443 * @see https://html.spec.whatwg.org/#parsing-main-intabletext |
|
3444 * @see WP_HTML_Processor::step |
|
3445 * |
|
3446 * @return bool Whether an element was found. |
|
3447 */ |
|
3448 private function step_in_table_text(): bool { |
|
3449 $this->bail( 'No support for parsing in the ' . WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_TEXT . ' state.' ); |
|
3450 } |
|
3451 |
|
3452 /** |
|
3453 * Parses next element in the 'in caption' insertion mode. |
|
3454 * |
|
3455 * This internal function performs the 'in caption' insertion mode |
|
3456 * logic for the generalized WP_HTML_Processor::step() function. |
|
3457 * |
|
3458 * @since 6.7.0 |
|
3459 * |
|
3460 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
3461 * |
|
3462 * @see https://html.spec.whatwg.org/#parsing-main-incaption |
|
3463 * @see WP_HTML_Processor::step |
|
3464 * |
|
3465 * @return bool Whether an element was found. |
|
3466 */ |
|
3467 private function step_in_caption(): bool { |
|
3468 $tag_name = $this->get_tag(); |
|
3469 $op_sigil = $this->is_tag_closer() ? '-' : '+'; |
|
3470 $op = "{$op_sigil}{$tag_name}"; |
|
3471 |
|
3472 switch ( $op ) { |
|
3473 /* |
|
3474 * > An end tag whose tag name is "caption" |
|
3475 * > A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody", "td", "tfoot", "th", "thead", "tr" |
|
3476 * > An end tag whose tag name is "table" |
|
3477 * |
|
3478 * These tag handling rules are identical except for the final instruction. |
|
3479 * Handle them in a single block. |
|
3480 */ |
|
3481 case '-CAPTION': |
|
3482 case '+CAPTION': |
|
3483 case '+COL': |
|
3484 case '+COLGROUP': |
|
3485 case '+TBODY': |
|
3486 case '+TD': |
|
3487 case '+TFOOT': |
|
3488 case '+TH': |
|
3489 case '+THEAD': |
|
3490 case '+TR': |
|
3491 case '-TABLE': |
|
3492 if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'CAPTION' ) ) { |
|
3493 // Parse error: ignore the token. |
|
3494 return $this->step(); |
|
3495 } |
|
3496 |
|
3497 $this->generate_implied_end_tags(); |
|
3498 if ( ! $this->state->stack_of_open_elements->current_node_is( 'CAPTION' ) ) { |
|
3499 // @todo Indicate a parse error once it's possible. |
|
3500 } |
|
3501 |
|
3502 $this->state->stack_of_open_elements->pop_until( 'CAPTION' ); |
|
3503 $this->state->active_formatting_elements->clear_up_to_last_marker(); |
|
3504 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; |
|
3505 |
|
3506 // If this is not a CAPTION end tag, the token should be reprocessed. |
|
3507 if ( '-CAPTION' === $op ) { |
|
3508 return true; |
|
3509 } |
|
3510 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
3511 |
|
3512 /** |
|
3513 * > An end tag whose tag name is one of: "body", "col", "colgroup", "html", "tbody", "td", "tfoot", "th", "thead", "tr" |
|
3514 */ |
|
3515 case '-BODY': |
|
3516 case '-COL': |
|
3517 case '-COLGROUP': |
|
3518 case '-HTML': |
|
3519 case '-TBODY': |
|
3520 case '-TD': |
|
3521 case '-TFOOT': |
|
3522 case '-TH': |
|
3523 case '-THEAD': |
|
3524 case '-TR': |
|
3525 // Parse error: ignore the token. |
|
3526 return $this->step(); |
|
3527 } |
|
3528 |
|
3529 /** |
|
3530 * > Anything else |
|
3531 * > Process the token using the rules for the "in body" insertion mode. |
|
3532 */ |
|
3533 return $this->step_in_body(); |
|
3534 } |
|
3535 |
|
3536 /** |
|
3537 * Parses next element in the 'in column group' insertion mode. |
|
3538 * |
|
3539 * This internal function performs the 'in column group' insertion mode |
|
3540 * logic for the generalized WP_HTML_Processor::step() function. |
|
3541 * |
|
3542 * @since 6.7.0 |
|
3543 * |
|
3544 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
3545 * |
|
3546 * @see https://html.spec.whatwg.org/#parsing-main-incolgroup |
|
3547 * @see WP_HTML_Processor::step |
|
3548 * |
|
3549 * @return bool Whether an element was found. |
|
3550 */ |
|
3551 private function step_in_column_group(): bool { |
|
3552 $token_name = $this->get_token_name(); |
|
3553 $token_type = $this->get_token_type(); |
|
3554 $op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : ''; |
|
3555 $op = "{$op_sigil}{$token_name}"; |
|
3556 |
|
3557 switch ( $op ) { |
|
3558 /* |
|
3559 * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), |
|
3560 * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE |
|
3561 */ |
|
3562 case '#text': |
|
3563 if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { |
|
3564 // Insert the character. |
|
3565 $this->insert_html_element( $this->state->current_token ); |
|
3566 return true; |
|
3567 } |
|
3568 |
|
3569 goto in_column_group_anything_else; |
|
3570 break; |
|
3571 |
|
3572 /* |
|
3573 * > A comment token |
|
3574 */ |
|
3575 case '#comment': |
|
3576 case '#funky-comment': |
|
3577 case '#presumptuous-tag': |
|
3578 $this->insert_html_element( $this->state->current_token ); |
|
3579 return true; |
|
3580 |
|
3581 /* |
|
3582 * > A DOCTYPE token |
|
3583 */ |
|
3584 case 'html': |
|
3585 // @todo Indicate a parse error once it's possible. |
|
3586 return $this->step(); |
|
3587 |
|
3588 /* |
|
3589 * > A start tag whose tag name is "html" |
|
3590 */ |
|
3591 case '+HTML': |
|
3592 return $this->step_in_body(); |
|
3593 |
|
3594 /* |
|
3595 * > A start tag whose tag name is "col" |
|
3596 */ |
|
3597 case '+COL': |
|
3598 $this->insert_html_element( $this->state->current_token ); |
|
3599 $this->state->stack_of_open_elements->pop(); |
|
3600 return true; |
|
3601 |
|
3602 /* |
|
3603 * > An end tag whose tag name is "colgroup" |
|
3604 */ |
|
3605 case '-COLGROUP': |
|
3606 if ( ! $this->state->stack_of_open_elements->current_node_is( 'COLGROUP' ) ) { |
|
3607 // @todo Indicate a parse error once it's possible. |
|
3608 return $this->step(); |
|
3609 } |
|
3610 $this->state->stack_of_open_elements->pop(); |
|
3611 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; |
|
3612 return true; |
|
3613 |
|
3614 /* |
|
3615 * > An end tag whose tag name is "col" |
|
3616 */ |
|
3617 case '-COL': |
|
3618 // Parse error: ignore the token. |
|
3619 return $this->step(); |
|
3620 |
|
3621 /* |
|
3622 * > A start tag whose tag name is "template" |
|
3623 * > An end tag whose tag name is "template" |
|
3624 */ |
|
3625 case '+TEMPLATE': |
|
3626 case '-TEMPLATE': |
|
3627 return $this->step_in_head(); |
|
3628 } |
|
3629 |
|
3630 in_column_group_anything_else: |
|
3631 /* |
|
3632 * > Anything else |
|
3633 */ |
|
3634 if ( ! $this->state->stack_of_open_elements->current_node_is( 'COLGROUP' ) ) { |
|
3635 // @todo Indicate a parse error once it's possible. |
|
3636 return $this->step(); |
|
3637 } |
|
3638 $this->state->stack_of_open_elements->pop(); |
|
3639 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; |
|
3640 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
3641 } |
|
3642 |
|
3643 /** |
|
3644 * Parses next element in the 'in table body' insertion mode. |
|
3645 * |
|
3646 * This internal function performs the 'in table body' insertion mode |
|
3647 * logic for the generalized WP_HTML_Processor::step() function. |
|
3648 * |
|
3649 * @since 6.7.0 |
|
3650 * |
|
3651 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
3652 * |
|
3653 * @see https://html.spec.whatwg.org/#parsing-main-intbody |
|
3654 * @see WP_HTML_Processor::step |
|
3655 * |
|
3656 * @return bool Whether an element was found. |
|
3657 */ |
|
3658 private function step_in_table_body(): bool { |
|
3659 $tag_name = $this->get_tag(); |
|
3660 $op_sigil = $this->is_tag_closer() ? '-' : '+'; |
|
3661 $op = "{$op_sigil}{$tag_name}"; |
|
3662 |
|
3663 switch ( $op ) { |
|
3664 /* |
|
3665 * > A start tag whose tag name is "tr" |
|
3666 */ |
|
3667 case '+TR': |
|
3668 $this->state->stack_of_open_elements->clear_to_table_body_context(); |
|
3669 $this->insert_html_element( $this->state->current_token ); |
|
3670 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; |
|
3671 return true; |
|
3672 |
|
3673 /* |
|
3674 * > A start tag whose tag name is one of: "th", "td" |
|
3675 */ |
|
3676 case '+TH': |
|
3677 case '+TD': |
|
3678 // @todo Indicate a parse error once it's possible. |
|
3679 $this->state->stack_of_open_elements->clear_to_table_body_context(); |
|
3680 $this->insert_virtual_node( 'TR' ); |
|
3681 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; |
|
3682 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
3683 |
|
3684 /* |
|
3685 * > An end tag whose tag name is one of: "tbody", "tfoot", "thead" |
|
3686 */ |
|
3687 case '-TBODY': |
|
3688 case '-TFOOT': |
|
3689 case '-THEAD': |
|
3690 if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { |
|
3691 // Parse error: ignore the token. |
|
3692 return $this->step(); |
|
3693 } |
|
3694 |
|
3695 $this->state->stack_of_open_elements->clear_to_table_body_context(); |
|
3696 $this->state->stack_of_open_elements->pop(); |
|
3697 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; |
|
3698 return true; |
|
3699 |
|
3700 /* |
|
3701 * > A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody", "tfoot", "thead" |
|
3702 * > An end tag whose tag name is "table" |
|
3703 */ |
|
3704 case '+CAPTION': |
|
3705 case '+COL': |
|
3706 case '+COLGROUP': |
|
3707 case '+TBODY': |
|
3708 case '+TFOOT': |
|
3709 case '+THEAD': |
|
3710 case '-TABLE': |
|
3711 if ( |
|
3712 ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TBODY' ) && |
|
3713 ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'THEAD' ) && |
|
3714 ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TFOOT' ) |
|
3715 ) { |
|
3716 // Parse error: ignore the token. |
|
3717 return $this->step(); |
|
3718 } |
|
3719 $this->state->stack_of_open_elements->clear_to_table_body_context(); |
|
3720 $this->state->stack_of_open_elements->pop(); |
|
3721 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; |
|
3722 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
3723 |
|
3724 /* |
|
3725 * > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html", "td", "th", "tr" |
|
3726 */ |
|
3727 case '-BODY': |
|
3728 case '-CAPTION': |
|
3729 case '-COL': |
|
3730 case '-COLGROUP': |
|
3731 case '-HTML': |
|
3732 case '-TD': |
|
3733 case '-TH': |
|
3734 case '-TR': |
|
3735 // Parse error: ignore the token. |
|
3736 return $this->step(); |
|
3737 } |
|
3738 |
|
3739 /* |
|
3740 * > Anything else |
|
3741 * > Process the token using the rules for the "in table" insertion mode. |
|
3742 */ |
|
3743 return $this->step_in_table(); |
|
3744 } |
|
3745 |
|
3746 /** |
|
3747 * Parses next element in the 'in row' insertion mode. |
|
3748 * |
|
3749 * This internal function performs the 'in row' insertion mode |
|
3750 * logic for the generalized WP_HTML_Processor::step() function. |
|
3751 * |
|
3752 * @since 6.7.0 |
|
3753 * |
|
3754 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
3755 * |
|
3756 * @see https://html.spec.whatwg.org/#parsing-main-intr |
|
3757 * @see WP_HTML_Processor::step |
|
3758 * |
|
3759 * @return bool Whether an element was found. |
|
3760 */ |
|
3761 private function step_in_row(): bool { |
|
3762 $tag_name = $this->get_tag(); |
|
3763 $op_sigil = $this->is_tag_closer() ? '-' : '+'; |
|
3764 $op = "{$op_sigil}{$tag_name}"; |
|
3765 |
|
3766 switch ( $op ) { |
|
3767 /* |
|
3768 * > A start tag whose tag name is one of: "th", "td" |
|
3769 */ |
|
3770 case '+TH': |
|
3771 case '+TD': |
|
3772 $this->state->stack_of_open_elements->clear_to_table_row_context(); |
|
3773 $this->insert_html_element( $this->state->current_token ); |
|
3774 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL; |
|
3775 $this->state->active_formatting_elements->insert_marker(); |
|
3776 return true; |
|
3777 |
|
3778 /* |
|
3779 * > An end tag whose tag name is "tr" |
|
3780 */ |
|
3781 case '-TR': |
|
3782 if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) { |
|
3783 // Parse error: ignore the token. |
|
3784 return $this->step(); |
|
3785 } |
|
3786 |
|
3787 $this->state->stack_of_open_elements->clear_to_table_row_context(); |
|
3788 $this->state->stack_of_open_elements->pop(); |
|
3789 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; |
|
3790 return true; |
|
3791 |
|
3792 /* |
|
3793 * > A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody", "tfoot", "thead", "tr" |
|
3794 * > An end tag whose tag name is "table" |
|
3795 */ |
|
3796 case '+CAPTION': |
|
3797 case '+COL': |
|
3798 case '+COLGROUP': |
|
3799 case '+TBODY': |
|
3800 case '+TFOOT': |
|
3801 case '+THEAD': |
|
3802 case '+TR': |
|
3803 case '-TABLE': |
|
3804 if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) { |
|
3805 // Parse error: ignore the token. |
|
3806 return $this->step(); |
|
3807 } |
|
3808 |
|
3809 $this->state->stack_of_open_elements->clear_to_table_row_context(); |
|
3810 $this->state->stack_of_open_elements->pop(); |
|
3811 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; |
|
3812 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
3813 |
|
3814 /* |
|
3815 * > An end tag whose tag name is one of: "tbody", "tfoot", "thead" |
|
3816 */ |
|
3817 case '-TBODY': |
|
3818 case '-TFOOT': |
|
3819 case '-THEAD': |
|
3820 if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { |
|
3821 // Parse error: ignore the token. |
|
3822 return $this->step(); |
|
3823 } |
|
3824 |
|
3825 if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( 'TR' ) ) { |
|
3826 // Ignore the token. |
|
3827 return $this->step(); |
|
3828 } |
|
3829 |
|
3830 $this->state->stack_of_open_elements->clear_to_table_row_context(); |
|
3831 $this->state->stack_of_open_elements->pop(); |
|
3832 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; |
|
3833 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
3834 |
|
3835 /* |
|
3836 * > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html", "td", "th" |
|
3837 */ |
|
3838 case '-BODY': |
|
3839 case '-CAPTION': |
|
3840 case '-COL': |
|
3841 case '-COLGROUP': |
|
3842 case '-HTML': |
|
3843 case '-TD': |
|
3844 case '-TH': |
|
3845 // Parse error: ignore the token. |
|
3846 return $this->step(); |
|
3847 } |
|
3848 |
|
3849 /* |
|
3850 * > Anything else |
|
3851 * > Process the token using the rules for the "in table" insertion mode. |
|
3852 */ |
|
3853 return $this->step_in_table(); |
|
3854 } |
|
3855 |
|
3856 /** |
|
3857 * Parses next element in the 'in cell' insertion mode. |
|
3858 * |
|
3859 * This internal function performs the 'in cell' insertion mode |
|
3860 * logic for the generalized WP_HTML_Processor::step() function. |
|
3861 * |
|
3862 * @since 6.7.0 |
|
3863 * |
|
3864 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
3865 * |
|
3866 * @see https://html.spec.whatwg.org/#parsing-main-intd |
|
3867 * @see WP_HTML_Processor::step |
|
3868 * |
|
3869 * @return bool Whether an element was found. |
|
3870 */ |
|
3871 private function step_in_cell(): bool { |
|
3872 $tag_name = $this->get_tag(); |
|
3873 $op_sigil = $this->is_tag_closer() ? '-' : '+'; |
|
3874 $op = "{$op_sigil}{$tag_name}"; |
|
3875 |
|
3876 switch ( $op ) { |
|
3877 /* |
|
3878 * > An end tag whose tag name is one of: "td", "th" |
|
3879 */ |
|
3880 case '-TD': |
|
3881 case '-TH': |
|
3882 if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { |
|
3883 // Parse error: ignore the token. |
|
3884 return $this->step(); |
|
3885 } |
|
3886 |
|
3887 $this->generate_implied_end_tags(); |
|
3888 |
|
3889 /* |
|
3890 * @todo This needs to check if the current node is an HTML element, meaning that |
|
3891 * when SVG and MathML support is added, this needs to differentiate between an |
|
3892 * HTML element of the given name, such as `<center>`, and a foreign element of |
|
3893 * the same given name. |
|
3894 */ |
|
3895 if ( ! $this->state->stack_of_open_elements->current_node_is( $tag_name ) ) { |
|
3896 // @todo Indicate a parse error once it's possible. |
|
3897 } |
|
3898 |
|
3899 $this->state->stack_of_open_elements->pop_until( $tag_name ); |
|
3900 $this->state->active_formatting_elements->clear_up_to_last_marker(); |
|
3901 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; |
|
3902 return true; |
|
3903 |
|
3904 /* |
|
3905 * > A start tag whose tag name is one of: "caption", "col", "colgroup", "tbody", "td", |
|
3906 * > "tfoot", "th", "thead", "tr" |
|
3907 */ |
|
3908 case '+CAPTION': |
|
3909 case '+COL': |
|
3910 case '+COLGROUP': |
|
3911 case '+TBODY': |
|
3912 case '+TD': |
|
3913 case '+TFOOT': |
|
3914 case '+TH': |
|
3915 case '+THEAD': |
|
3916 case '+TR': |
|
3917 /* |
|
3918 * > Assert: The stack of open elements has a td or th element in table scope. |
|
3919 * |
|
3920 * Nothing to do here, except to verify in tests that this never appears. |
|
3921 */ |
|
3922 |
|
3923 $this->close_cell(); |
|
3924 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
3925 |
|
3926 /* |
|
3927 * > An end tag whose tag name is one of: "body", "caption", "col", "colgroup", "html" |
|
3928 */ |
|
3929 case '-BODY': |
|
3930 case '-CAPTION': |
|
3931 case '-COL': |
|
3932 case '-COLGROUP': |
|
3933 case '-HTML': |
|
3934 // Parse error: ignore the token. |
|
3935 return $this->step(); |
|
3936 |
|
3937 /* |
|
3938 * > An end tag whose tag name is one of: "table", "tbody", "tfoot", "thead", "tr" |
|
3939 */ |
|
3940 case '-TABLE': |
|
3941 case '-TBODY': |
|
3942 case '-TFOOT': |
|
3943 case '-THEAD': |
|
3944 case '-TR': |
|
3945 if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $tag_name ) ) { |
|
3946 // Parse error: ignore the token. |
|
3947 return $this->step(); |
|
3948 } |
|
3949 $this->close_cell(); |
|
3950 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
3951 } |
|
3952 |
|
3953 /* |
|
3954 * > Anything else |
|
3955 * > Process the token using the rules for the "in body" insertion mode. |
|
3956 */ |
|
3957 return $this->step_in_body(); |
|
3958 } |
|
3959 |
|
3960 /** |
|
3961 * Parses next element in the 'in select' insertion mode. |
|
3962 * |
|
3963 * This internal function performs the 'in select' insertion mode |
|
3964 * logic for the generalized WP_HTML_Processor::step() function. |
|
3965 * |
|
3966 * @since 6.7.0 |
|
3967 * |
|
3968 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
3969 * |
|
3970 * @see https://html.spec.whatwg.org/multipage/parsing.html#parsing-main-inselect |
|
3971 * @see WP_HTML_Processor::step |
|
3972 * |
|
3973 * @return bool Whether an element was found. |
|
3974 */ |
|
3975 private function step_in_select(): bool { |
|
3976 $token_name = $this->get_token_name(); |
|
3977 $token_type = $this->get_token_type(); |
|
3978 $op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : ''; |
|
3979 $op = "{$op_sigil}{$token_name}"; |
|
3980 |
|
3981 switch ( $op ) { |
|
3982 /* |
|
3983 * > Any other character token |
|
3984 */ |
|
3985 case '#text': |
|
3986 /* |
|
3987 * > A character token that is U+0000 NULL |
|
3988 * |
|
3989 * If a text node only comprises null bytes then it should be |
|
3990 * entirely ignored and should not return to calling code. |
|
3991 */ |
|
3992 if ( parent::TEXT_IS_NULL_SEQUENCE === $this->text_node_classification ) { |
|
3993 // Parse error: ignore the token. |
|
3994 return $this->step(); |
|
3995 } |
|
3996 |
|
3997 $this->insert_html_element( $this->state->current_token ); |
|
3998 return true; |
|
3999 |
|
4000 /* |
|
4001 * > A comment token |
|
4002 */ |
|
4003 case '#comment': |
|
4004 case '#funky-comment': |
|
4005 case '#presumptuous-tag': |
|
4006 $this->insert_html_element( $this->state->current_token ); |
|
4007 return true; |
|
4008 |
|
4009 /* |
|
4010 * > A DOCTYPE token |
|
4011 */ |
|
4012 case 'html': |
|
4013 // Parse error: ignore the token. |
|
4014 return $this->step(); |
|
4015 |
|
4016 /* |
|
4017 * > A start tag whose tag name is "html" |
|
4018 */ |
|
4019 case '+HTML': |
|
4020 return $this->step_in_body(); |
|
4021 |
|
4022 /* |
|
4023 * > A start tag whose tag name is "option" |
|
4024 */ |
|
4025 case '+OPTION': |
|
4026 if ( $this->state->stack_of_open_elements->current_node_is( 'OPTION' ) ) { |
|
4027 $this->state->stack_of_open_elements->pop(); |
|
4028 } |
|
4029 $this->insert_html_element( $this->state->current_token ); |
|
4030 return true; |
|
4031 |
|
4032 /* |
|
4033 * > A start tag whose tag name is "optgroup" |
|
4034 * > A start tag whose tag name is "hr" |
|
4035 * |
|
4036 * These rules are identical except for the treatment of the self-closing flag and |
|
4037 * the subsequent pop of the HR void element, all of which is handled elsewhere in the processor. |
|
4038 */ |
|
4039 case '+OPTGROUP': |
|
4040 case '+HR': |
|
4041 if ( $this->state->stack_of_open_elements->current_node_is( 'OPTION' ) ) { |
|
4042 $this->state->stack_of_open_elements->pop(); |
|
4043 } |
|
4044 |
|
4045 if ( $this->state->stack_of_open_elements->current_node_is( 'OPTGROUP' ) ) { |
|
4046 $this->state->stack_of_open_elements->pop(); |
|
4047 } |
|
4048 |
|
4049 $this->insert_html_element( $this->state->current_token ); |
|
4050 return true; |
|
4051 |
|
4052 /* |
|
4053 * > An end tag whose tag name is "optgroup" |
|
4054 */ |
|
4055 case '-OPTGROUP': |
|
4056 $current_node = $this->state->stack_of_open_elements->current_node(); |
|
4057 if ( $current_node && 'OPTION' === $current_node->node_name ) { |
|
4058 foreach ( $this->state->stack_of_open_elements->walk_up( $current_node ) as $parent ) { |
|
4059 break; |
|
4060 } |
|
4061 if ( $parent && 'OPTGROUP' === $parent->node_name ) { |
|
4062 $this->state->stack_of_open_elements->pop(); |
|
4063 } |
|
4064 } |
|
4065 |
|
4066 if ( $this->state->stack_of_open_elements->current_node_is( 'OPTGROUP' ) ) { |
|
4067 $this->state->stack_of_open_elements->pop(); |
|
4068 return true; |
|
4069 } |
|
4070 |
|
4071 // Parse error: ignore the token. |
|
4072 return $this->step(); |
|
4073 |
|
4074 /* |
|
4075 * > An end tag whose tag name is "option" |
|
4076 */ |
|
4077 case '-OPTION': |
|
4078 if ( $this->state->stack_of_open_elements->current_node_is( 'OPTION' ) ) { |
|
4079 $this->state->stack_of_open_elements->pop(); |
|
4080 return true; |
|
4081 } |
|
4082 |
|
4083 // Parse error: ignore the token. |
|
4084 return $this->step(); |
|
4085 |
|
4086 /* |
|
4087 * > An end tag whose tag name is "select" |
|
4088 * > A start tag whose tag name is "select" |
|
4089 * |
|
4090 * > It just gets treated like an end tag. |
|
4091 */ |
|
4092 case '-SELECT': |
|
4093 case '+SELECT': |
|
4094 if ( ! $this->state->stack_of_open_elements->has_element_in_select_scope( 'SELECT' ) ) { |
|
4095 // Parse error: ignore the token. |
|
4096 return $this->step(); |
|
4097 } |
|
4098 $this->state->stack_of_open_elements->pop_until( 'SELECT' ); |
|
4099 $this->reset_insertion_mode_appropriately(); |
|
4100 return true; |
|
4101 |
|
4102 /* |
|
4103 * > A start tag whose tag name is one of: "input", "keygen", "textarea" |
|
4104 * |
|
4105 * All three of these tags are considered a parse error when found in this insertion mode. |
|
4106 */ |
|
4107 case '+INPUT': |
|
4108 case '+KEYGEN': |
|
4109 case '+TEXTAREA': |
|
4110 if ( ! $this->state->stack_of_open_elements->has_element_in_select_scope( 'SELECT' ) ) { |
|
4111 // Ignore the token. |
|
4112 return $this->step(); |
|
4113 } |
|
4114 $this->state->stack_of_open_elements->pop_until( 'SELECT' ); |
|
4115 $this->reset_insertion_mode_appropriately(); |
|
4116 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
4117 |
|
4118 /* |
|
4119 * > A start tag whose tag name is one of: "script", "template" |
|
4120 * > An end tag whose tag name is "template" |
|
4121 */ |
|
4122 case '+SCRIPT': |
|
4123 case '+TEMPLATE': |
|
4124 case '-TEMPLATE': |
|
4125 return $this->step_in_head(); |
|
4126 } |
|
4127 |
|
4128 /* |
|
4129 * > Anything else |
|
4130 * > Parse error: ignore the token. |
|
4131 */ |
|
4132 return $this->step(); |
|
4133 } |
|
4134 |
|
4135 /** |
|
4136 * Parses next element in the 'in select in table' insertion mode. |
|
4137 * |
|
4138 * This internal function performs the 'in select in table' insertion mode |
|
4139 * logic for the generalized WP_HTML_Processor::step() function. |
|
4140 * |
|
4141 * @since 6.7.0 |
|
4142 * |
|
4143 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
4144 * |
|
4145 * @see https://html.spec.whatwg.org/#parsing-main-inselectintable |
|
4146 * @see WP_HTML_Processor::step |
|
4147 * |
|
4148 * @return bool Whether an element was found. |
|
4149 */ |
|
4150 private function step_in_select_in_table(): bool { |
|
4151 $token_name = $this->get_token_name(); |
|
4152 $token_type = $this->get_token_type(); |
|
4153 $op_sigil = '#tag' === $token_type ? ( parent::is_tag_closer() ? '-' : '+' ) : ''; |
|
4154 $op = "{$op_sigil}{$token_name}"; |
|
4155 |
|
4156 switch ( $op ) { |
|
4157 /* |
|
4158 * > A start tag whose tag name is one of: "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th" |
|
4159 */ |
|
4160 case '+CAPTION': |
|
4161 case '+TABLE': |
|
4162 case '+TBODY': |
|
4163 case '+TFOOT': |
|
4164 case '+THEAD': |
|
4165 case '+TR': |
|
4166 case '+TD': |
|
4167 case '+TH': |
|
4168 // @todo Indicate a parse error once it's possible. |
|
4169 $this->state->stack_of_open_elements->pop_until( 'SELECT' ); |
|
4170 $this->reset_insertion_mode_appropriately(); |
|
4171 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
4172 |
|
4173 /* |
|
4174 * > An end tag whose tag name is one of: "caption", "table", "tbody", "tfoot", "thead", "tr", "td", "th" |
|
4175 */ |
|
4176 case '-CAPTION': |
|
4177 case '-TABLE': |
|
4178 case '-TBODY': |
|
4179 case '-TFOOT': |
|
4180 case '-THEAD': |
|
4181 case '-TR': |
|
4182 case '-TD': |
|
4183 case '-TH': |
|
4184 // @todo Indicate a parse error once it's possible. |
|
4185 if ( ! $this->state->stack_of_open_elements->has_element_in_table_scope( $token_name ) ) { |
|
4186 return $this->step(); |
|
4187 } |
|
4188 $this->state->stack_of_open_elements->pop_until( 'SELECT' ); |
|
4189 $this->reset_insertion_mode_appropriately(); |
|
4190 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
4191 } |
|
4192 |
|
4193 /* |
|
4194 * > Anything else |
|
4195 */ |
|
4196 return $this->step_in_select(); |
|
4197 } |
|
4198 |
|
4199 /** |
|
4200 * Parses next element in the 'in template' insertion mode. |
|
4201 * |
|
4202 * This internal function performs the 'in template' insertion mode |
|
4203 * logic for the generalized WP_HTML_Processor::step() function. |
|
4204 * |
|
4205 * @since 6.7.0 Stub implementation. |
|
4206 * |
|
4207 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
4208 * |
|
4209 * @see https://html.spec.whatwg.org/#parsing-main-intemplate |
|
4210 * @see WP_HTML_Processor::step |
|
4211 * |
|
4212 * @return bool Whether an element was found. |
|
4213 */ |
|
4214 private function step_in_template(): bool { |
|
4215 $token_name = $this->get_token_name(); |
|
4216 $token_type = $this->get_token_type(); |
|
4217 $is_closer = $this->is_tag_closer(); |
|
4218 $op_sigil = '#tag' === $token_type ? ( $is_closer ? '-' : '+' ) : ''; |
|
4219 $op = "{$op_sigil}{$token_name}"; |
|
4220 |
|
4221 switch ( $op ) { |
|
4222 /* |
|
4223 * > A character token |
|
4224 * > A comment token |
|
4225 * > A DOCTYPE token |
|
4226 */ |
|
4227 case '#text': |
|
4228 case '#comment': |
|
4229 case '#funky-comment': |
|
4230 case '#presumptuous-tag': |
|
4231 case 'html': |
|
4232 return $this->step_in_body(); |
|
4233 |
|
4234 /* |
|
4235 * > A start tag whose tag name is one of: "base", "basefont", "bgsound", "link", |
|
4236 * > "meta", "noframes", "script", "style", "template", "title" |
|
4237 * > An end tag whose tag name is "template" |
|
4238 */ |
|
4239 case '+BASE': |
|
4240 case '+BASEFONT': |
|
4241 case '+BGSOUND': |
|
4242 case '+LINK': |
|
4243 case '+META': |
|
4244 case '+NOFRAMES': |
|
4245 case '+SCRIPT': |
|
4246 case '+STYLE': |
|
4247 case '+TEMPLATE': |
|
4248 case '+TITLE': |
|
4249 case '-TEMPLATE': |
|
4250 return $this->step_in_head(); |
|
4251 |
|
4252 /* |
|
4253 * > A start tag whose tag name is one of: "caption", "colgroup", "tbody", "tfoot", "thead" |
|
4254 */ |
|
4255 case '+CAPTION': |
|
4256 case '+COLGROUP': |
|
4257 case '+TBODY': |
|
4258 case '+TFOOT': |
|
4259 case '+THEAD': |
|
4260 array_pop( $this->state->stack_of_template_insertion_modes ); |
|
4261 $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; |
|
4262 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; |
|
4263 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
4264 |
|
4265 /* |
|
4266 * > A start tag whose tag name is "col" |
|
4267 */ |
|
4268 case '+COL': |
|
4269 array_pop( $this->state->stack_of_template_insertion_modes ); |
|
4270 $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; |
|
4271 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; |
|
4272 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
4273 |
|
4274 /* |
|
4275 * > A start tag whose tag name is "tr" |
|
4276 */ |
|
4277 case '+TR': |
|
4278 array_pop( $this->state->stack_of_template_insertion_modes ); |
|
4279 $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; |
|
4280 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; |
|
4281 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
4282 |
|
4283 /* |
|
4284 * > A start tag whose tag name is one of: "td", "th" |
|
4285 */ |
|
4286 case '+TD': |
|
4287 case '+TH': |
|
4288 array_pop( $this->state->stack_of_template_insertion_modes ); |
|
4289 $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; |
|
4290 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; |
|
4291 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
4292 } |
|
4293 |
|
4294 /* |
|
4295 * > Any other start tag |
|
4296 */ |
|
4297 if ( ! $is_closer ) { |
|
4298 array_pop( $this->state->stack_of_template_insertion_modes ); |
|
4299 $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; |
|
4300 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; |
|
4301 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
4302 } |
|
4303 |
|
4304 /* |
|
4305 * > Any other end tag |
|
4306 */ |
|
4307 if ( $is_closer ) { |
|
4308 // Parse error: ignore the token. |
|
4309 return $this->step(); |
|
4310 } |
|
4311 |
|
4312 /* |
|
4313 * > An end-of-file token |
|
4314 */ |
|
4315 if ( ! $this->state->stack_of_open_elements->contains( 'TEMPLATE' ) ) { |
|
4316 // Stop parsing. |
|
4317 return false; |
|
4318 } |
|
4319 |
|
4320 // @todo Indicate a parse error once it's possible. |
|
4321 $this->state->stack_of_open_elements->pop_until( 'TEMPLATE' ); |
|
4322 $this->state->active_formatting_elements->clear_up_to_last_marker(); |
|
4323 array_pop( $this->state->stack_of_template_insertion_modes ); |
|
4324 $this->reset_insertion_mode_appropriately(); |
|
4325 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
4326 } |
|
4327 |
|
4328 /** |
|
4329 * Parses next element in the 'after body' insertion mode. |
|
4330 * |
|
4331 * This internal function performs the 'after body' insertion mode |
|
4332 * logic for the generalized WP_HTML_Processor::step() function. |
|
4333 * |
|
4334 * @since 6.7.0 Stub implementation. |
|
4335 * |
|
4336 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
4337 * |
|
4338 * @see https://html.spec.whatwg.org/#parsing-main-afterbody |
|
4339 * @see WP_HTML_Processor::step |
|
4340 * |
|
4341 * @return bool Whether an element was found. |
|
4342 */ |
|
4343 private function step_after_body(): bool { |
|
4344 $tag_name = $this->get_token_name(); |
|
4345 $token_type = $this->get_token_type(); |
|
4346 $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; |
|
4347 $op = "{$op_sigil}{$tag_name}"; |
|
4348 |
|
4349 switch ( $op ) { |
|
4350 /* |
|
4351 * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), |
|
4352 * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE |
|
4353 * |
|
4354 * > Process the token using the rules for the "in body" insertion mode. |
|
4355 */ |
|
4356 case '#text': |
|
4357 if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { |
|
4358 return $this->step_in_body(); |
|
4359 } |
|
4360 goto after_body_anything_else; |
|
4361 break; |
|
4362 |
|
4363 /* |
|
4364 * > A comment token |
|
4365 */ |
|
4366 case '#comment': |
|
4367 case '#funky-comment': |
|
4368 case '#presumptuous-tag': |
|
4369 $this->bail( 'Content outside of BODY is unsupported.' ); |
|
4370 break; |
|
4371 |
|
4372 /* |
|
4373 * > A DOCTYPE token |
|
4374 */ |
|
4375 case 'html': |
|
4376 // Parse error: ignore the token. |
|
4377 return $this->step(); |
|
4378 |
|
4379 /* |
|
4380 * > A start tag whose tag name is "html" |
|
4381 */ |
|
4382 case '+HTML': |
|
4383 return $this->step_in_body(); |
|
4384 |
|
4385 /* |
|
4386 * > An end tag whose tag name is "html" |
|
4387 * |
|
4388 * > If the parser was created as part of the HTML fragment parsing algorithm, |
|
4389 * > this is a parse error; ignore the token. (fragment case) |
|
4390 * > |
|
4391 * > Otherwise, switch the insertion mode to "after after body". |
|
4392 */ |
|
4393 case '-HTML': |
|
4394 if ( isset( $this->context_node ) ) { |
|
4395 return $this->step(); |
|
4396 } |
|
4397 |
|
4398 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY; |
|
4399 /* |
|
4400 * The HTML element is not removed from the stack of open elements. |
|
4401 * Only internal state has changed, this does not qualify as a "step" |
|
4402 * in terms of advancing through the document to another token. |
|
4403 * Nothing has been pushed or popped. |
|
4404 * Proceed to parse the next item. |
|
4405 */ |
|
4406 return $this->step(); |
|
4407 } |
|
4408 |
|
4409 /* |
|
4410 * > Parse error. Switch the insertion mode to "in body" and reprocess the token. |
|
4411 */ |
|
4412 after_body_anything_else: |
|
4413 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; |
|
4414 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
4415 } |
|
4416 |
|
4417 /** |
|
4418 * Parses next element in the 'in frameset' insertion mode. |
|
4419 * |
|
4420 * This internal function performs the 'in frameset' insertion mode |
|
4421 * logic for the generalized WP_HTML_Processor::step() function. |
|
4422 * |
|
4423 * @since 6.7.0 Stub implementation. |
|
4424 * |
|
4425 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
4426 * |
|
4427 * @see https://html.spec.whatwg.org/#parsing-main-inframeset |
|
4428 * @see WP_HTML_Processor::step |
|
4429 * |
|
4430 * @return bool Whether an element was found. |
|
4431 */ |
|
4432 private function step_in_frameset(): bool { |
|
4433 $tag_name = $this->get_token_name(); |
|
4434 $token_type = $this->get_token_type(); |
|
4435 $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; |
|
4436 $op = "{$op_sigil}{$tag_name}"; |
|
4437 |
|
4438 switch ( $op ) { |
|
4439 /* |
|
4440 * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), |
|
4441 * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE |
|
4442 * > |
|
4443 * > Insert the character. |
|
4444 * |
|
4445 * This algorithm effectively strips non-whitespace characters from text and inserts |
|
4446 * them under HTML. This is not supported at this time. |
|
4447 */ |
|
4448 case '#text': |
|
4449 if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { |
|
4450 return $this->step_in_body(); |
|
4451 } |
|
4452 $this->bail( 'Non-whitespace characters cannot be handled in frameset.' ); |
|
4453 break; |
|
4454 |
|
4455 /* |
|
4456 * > A comment token |
|
4457 */ |
|
4458 case '#comment': |
|
4459 case '#funky-comment': |
|
4460 case '#presumptuous-tag': |
|
4461 $this->insert_html_element( $this->state->current_token ); |
|
4462 return true; |
|
4463 |
|
4464 /* |
|
4465 * > A DOCTYPE token |
|
4466 */ |
|
4467 case 'html': |
|
4468 // Parse error: ignore the token. |
|
4469 return $this->step(); |
|
4470 |
|
4471 /* |
|
4472 * > A start tag whose tag name is "html" |
|
4473 */ |
|
4474 case '+HTML': |
|
4475 return $this->step_in_body(); |
|
4476 |
|
4477 /* |
|
4478 * > A start tag whose tag name is "frameset" |
|
4479 */ |
|
4480 case '+FRAMESET': |
|
4481 $this->insert_html_element( $this->state->current_token ); |
|
4482 return true; |
|
4483 |
|
4484 /* |
|
4485 * > An end tag whose tag name is "frameset" |
|
4486 */ |
|
4487 case '-FRAMESET': |
|
4488 /* |
|
4489 * > If the current node is the root html element, then this is a parse error; |
|
4490 * > ignore the token. (fragment case) |
|
4491 */ |
|
4492 if ( $this->state->stack_of_open_elements->current_node_is( 'HTML' ) ) { |
|
4493 return $this->step(); |
|
4494 } |
|
4495 |
|
4496 /* |
|
4497 * > Otherwise, pop the current node from the stack of open elements. |
|
4498 */ |
|
4499 $this->state->stack_of_open_elements->pop(); |
|
4500 |
|
4501 /* |
|
4502 * > If the parser was not created as part of the HTML fragment parsing algorithm |
|
4503 * > (fragment case), and the current node is no longer a frameset element, then |
|
4504 * > switch the insertion mode to "after frameset". |
|
4505 */ |
|
4506 if ( ! isset( $this->context_node ) && ! $this->state->stack_of_open_elements->current_node_is( 'FRAMESET' ) ) { |
|
4507 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET; |
|
4508 } |
|
4509 |
|
4510 return true; |
|
4511 |
|
4512 /* |
|
4513 * > A start tag whose tag name is "frame" |
|
4514 * |
|
4515 * > Insert an HTML element for the token. Immediately pop the |
|
4516 * > current node off the stack of open elements. |
|
4517 * > |
|
4518 * > Acknowledge the token's self-closing flag, if it is set. |
|
4519 */ |
|
4520 case '+FRAME': |
|
4521 $this->insert_html_element( $this->state->current_token ); |
|
4522 $this->state->stack_of_open_elements->pop(); |
|
4523 return true; |
|
4524 |
|
4525 /* |
|
4526 * > A start tag whose tag name is "noframes" |
|
4527 */ |
|
4528 case '+NOFRAMES': |
|
4529 return $this->step_in_head(); |
|
4530 } |
|
4531 |
|
4532 // Parse error: ignore the token. |
|
4533 return $this->step(); |
|
4534 } |
|
4535 |
|
4536 /** |
|
4537 * Parses next element in the 'after frameset' insertion mode. |
|
4538 * |
|
4539 * This internal function performs the 'after frameset' insertion mode |
|
4540 * logic for the generalized WP_HTML_Processor::step() function. |
|
4541 * |
|
4542 * @since 6.7.0 Stub implementation. |
|
4543 * |
|
4544 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
4545 * |
|
4546 * @see https://html.spec.whatwg.org/#parsing-main-afterframeset |
|
4547 * @see WP_HTML_Processor::step |
|
4548 * |
|
4549 * @return bool Whether an element was found. |
|
4550 */ |
|
4551 private function step_after_frameset(): bool { |
|
4552 $tag_name = $this->get_token_name(); |
|
4553 $token_type = $this->get_token_type(); |
|
4554 $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; |
|
4555 $op = "{$op_sigil}{$tag_name}"; |
|
4556 |
|
4557 switch ( $op ) { |
|
4558 /* |
|
4559 * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), |
|
4560 * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE |
|
4561 * > |
|
4562 * > Insert the character. |
|
4563 * |
|
4564 * This algorithm effectively strips non-whitespace characters from text and inserts |
|
4565 * them under HTML. This is not supported at this time. |
|
4566 */ |
|
4567 case '#text': |
|
4568 if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { |
|
4569 return $this->step_in_body(); |
|
4570 } |
|
4571 $this->bail( 'Non-whitespace characters cannot be handled in after frameset' ); |
|
4572 break; |
|
4573 |
|
4574 /* |
|
4575 * > A comment token |
|
4576 */ |
|
4577 case '#comment': |
|
4578 case '#funky-comment': |
|
4579 case '#presumptuous-tag': |
|
4580 $this->insert_html_element( $this->state->current_token ); |
|
4581 return true; |
|
4582 |
|
4583 /* |
|
4584 * > A DOCTYPE token |
|
4585 */ |
|
4586 case 'html': |
|
4587 // Parse error: ignore the token. |
|
4588 return $this->step(); |
|
4589 |
|
4590 /* |
|
4591 * > A start tag whose tag name is "html" |
|
4592 */ |
|
4593 case '+HTML': |
|
4594 return $this->step_in_body(); |
|
4595 |
|
4596 /* |
|
4597 * > An end tag whose tag name is "html" |
|
4598 */ |
|
4599 case '-HTML': |
|
4600 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET; |
|
4601 /* |
|
4602 * The HTML element is not removed from the stack of open elements. |
|
4603 * Only internal state has changed, this does not qualify as a "step" |
|
4604 * in terms of advancing through the document to another token. |
|
4605 * Nothing has been pushed or popped. |
|
4606 * Proceed to parse the next item. |
|
4607 */ |
|
4608 return $this->step(); |
|
4609 |
|
4610 /* |
|
4611 * > A start tag whose tag name is "noframes" |
|
4612 */ |
|
4613 case '+NOFRAMES': |
|
4614 return $this->step_in_head(); |
|
4615 } |
|
4616 |
|
4617 // Parse error: ignore the token. |
|
4618 return $this->step(); |
|
4619 } |
|
4620 |
|
4621 /** |
|
4622 * Parses next element in the 'after after body' insertion mode. |
|
4623 * |
|
4624 * This internal function performs the 'after after body' insertion mode |
|
4625 * logic for the generalized WP_HTML_Processor::step() function. |
|
4626 * |
|
4627 * @since 6.7.0 Stub implementation. |
|
4628 * |
|
4629 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
4630 * |
|
4631 * @see https://html.spec.whatwg.org/#the-after-after-body-insertion-mode |
|
4632 * @see WP_HTML_Processor::step |
|
4633 * |
|
4634 * @return bool Whether an element was found. |
|
4635 */ |
|
4636 private function step_after_after_body(): bool { |
|
4637 $tag_name = $this->get_token_name(); |
|
4638 $token_type = $this->get_token_type(); |
|
4639 $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; |
|
4640 $op = "{$op_sigil}{$tag_name}"; |
|
4641 |
|
4642 switch ( $op ) { |
|
4643 /* |
|
4644 * > A comment token |
|
4645 */ |
|
4646 case '#comment': |
|
4647 case '#funky-comment': |
|
4648 case '#presumptuous-tag': |
|
4649 $this->bail( 'Content outside of HTML is unsupported.' ); |
|
4650 break; |
|
4651 |
|
4652 /* |
|
4653 * > A DOCTYPE token |
|
4654 * > A start tag whose tag name is "html" |
|
4655 * |
|
4656 * > Process the token using the rules for the "in body" insertion mode. |
|
4657 */ |
|
4658 case 'html': |
|
4659 case '+HTML': |
|
4660 return $this->step_in_body(); |
|
4661 |
|
4662 /* |
|
4663 * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), |
|
4664 * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE |
|
4665 * > |
|
4666 * > Process the token using the rules for the "in body" insertion mode. |
|
4667 */ |
|
4668 case '#text': |
|
4669 if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { |
|
4670 return $this->step_in_body(); |
|
4671 } |
|
4672 goto after_after_body_anything_else; |
|
4673 break; |
|
4674 } |
|
4675 |
|
4676 /* |
|
4677 * > Parse error. Switch the insertion mode to "in body" and reprocess the token. |
|
4678 */ |
|
4679 after_after_body_anything_else: |
|
4680 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; |
|
4681 return $this->step( self::REPROCESS_CURRENT_NODE ); |
|
4682 } |
|
4683 |
|
4684 /** |
|
4685 * Parses next element in the 'after after frameset' insertion mode. |
|
4686 * |
|
4687 * This internal function performs the 'after after frameset' insertion mode |
|
4688 * logic for the generalized WP_HTML_Processor::step() function. |
|
4689 * |
|
4690 * @since 6.7.0 Stub implementation. |
|
4691 * |
|
4692 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
4693 * |
|
4694 * @see https://html.spec.whatwg.org/#the-after-after-frameset-insertion-mode |
|
4695 * @see WP_HTML_Processor::step |
|
4696 * |
|
4697 * @return bool Whether an element was found. |
|
4698 */ |
|
4699 private function step_after_after_frameset(): bool { |
|
4700 $tag_name = $this->get_token_name(); |
|
4701 $token_type = $this->get_token_type(); |
|
4702 $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; |
|
4703 $op = "{$op_sigil}{$tag_name}"; |
|
4704 |
|
4705 switch ( $op ) { |
|
4706 /* |
|
4707 * > A comment token |
|
4708 */ |
|
4709 case '#comment': |
|
4710 case '#funky-comment': |
|
4711 case '#presumptuous-tag': |
|
4712 $this->bail( 'Content outside of HTML is unsupported.' ); |
|
4713 break; |
|
4714 |
|
4715 /* |
|
4716 * > A DOCTYPE token |
|
4717 * > A start tag whose tag name is "html" |
|
4718 * |
|
4719 * > Process the token using the rules for the "in body" insertion mode. |
|
4720 */ |
|
4721 case 'html': |
|
4722 case '+HTML': |
|
4723 return $this->step_in_body(); |
|
4724 |
|
4725 /* |
|
4726 * > A character token that is one of U+0009 CHARACTER TABULATION, U+000A LINE FEED (LF), |
|
4727 * > U+000C FORM FEED (FF), U+000D CARRIAGE RETURN (CR), or U+0020 SPACE |
|
4728 * > |
|
4729 * > Process the token using the rules for the "in body" insertion mode. |
|
4730 * |
|
4731 * This algorithm effectively strips non-whitespace characters from text and inserts |
|
4732 * them under HTML. This is not supported at this time. |
|
4733 */ |
|
4734 case '#text': |
|
4735 if ( parent::TEXT_IS_WHITESPACE === $this->text_node_classification ) { |
|
4736 return $this->step_in_body(); |
|
4737 } |
|
4738 $this->bail( 'Non-whitespace characters cannot be handled in after after frameset.' ); |
|
4739 break; |
|
4740 |
|
4741 /* |
|
4742 * > A start tag whose tag name is "noframes" |
|
4743 */ |
|
4744 case '+NOFRAMES': |
|
4745 return $this->step_in_head(); |
|
4746 } |
|
4747 |
|
4748 // Parse error: ignore the token. |
|
4749 return $this->step(); |
|
4750 } |
|
4751 |
|
4752 /** |
|
4753 * Parses next element in the 'in foreign content' insertion mode. |
|
4754 * |
|
4755 * This internal function performs the 'in foreign content' insertion mode |
|
4756 * logic for the generalized WP_HTML_Processor::step() function. |
|
4757 * |
|
4758 * @since 6.7.0 Stub implementation. |
|
4759 * |
|
4760 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
|
4761 * |
|
4762 * @see https://html.spec.whatwg.org/#parsing-main-inforeign |
|
4763 * @see WP_HTML_Processor::step |
|
4764 * |
|
4765 * @return bool Whether an element was found. |
|
4766 */ |
|
4767 private function step_in_foreign_content(): bool { |
|
4768 $tag_name = $this->get_token_name(); |
|
4769 $token_type = $this->get_token_type(); |
|
4770 $op_sigil = '#tag' === $token_type ? ( $this->is_tag_closer() ? '-' : '+' ) : ''; |
|
4771 $op = "{$op_sigil}{$tag_name}"; |
|
4772 |
|
4773 /* |
|
4774 * > A start tag whose name is "font", if the token has any attributes named "color", "face", or "size" |
|
4775 * |
|
4776 * This section drawn out above the switch to more easily incorporate |
|
4777 * the additional rules based on the presence of the attributes. |
|
4778 */ |
|
4779 if ( |
|
4780 '+FONT' === $op && |
|
4781 ( |
|
4782 null !== $this->get_attribute( 'color' ) || |
|
4783 null !== $this->get_attribute( 'face' ) || |
|
4784 null !== $this->get_attribute( 'size' ) |
|
4785 ) |
|
4786 ) { |
|
4787 $op = '+FONT with attributes'; |
|
4788 } |
|
4789 |
|
4790 switch ( $op ) { |
|
4791 case '#text': |
|
4792 /* |
|
4793 * > A character token that is U+0000 NULL |
|
4794 * |
|
4795 * This is handled by `get_modifiable_text()`. |
|
4796 */ |
|
4797 |
|
4798 /* |
|
4799 * Whitespace-only text does not affect the frameset-ok flag. |
|
4800 * It is probably inter-element whitespace, but it may also |
|
4801 * contain character references which decode only to whitespace. |
|
4802 */ |
|
4803 if ( parent::TEXT_IS_GENERIC === $this->text_node_classification ) { |
|
4804 $this->state->frameset_ok = false; |
|
4805 } |
|
4806 |
|
4807 $this->insert_foreign_element( $this->state->current_token, false ); |
|
4808 return true; |
|
4809 |
|
4810 /* |
|
4811 * CDATA sections are alternate wrappers for text content and therefore |
|
4812 * ought to follow the same rules as text nodes. |
|
4813 */ |
|
4814 case '#cdata-section': |
|
4815 /* |
|
4816 * NULL bytes and whitespace do not change the frameset-ok flag. |
|
4817 */ |
|
4818 $current_token = $this->bookmarks[ $this->state->current_token->bookmark_name ]; |
|
4819 $cdata_content_start = $current_token->start + 9; |
|
4820 $cdata_content_length = $current_token->length - 12; |
|
4821 if ( strspn( $this->html, "\0 \t\n\f\r", $cdata_content_start, $cdata_content_length ) !== $cdata_content_length ) { |
|
4822 $this->state->frameset_ok = false; |
|
4823 } |
|
4824 |
|
4825 $this->insert_foreign_element( $this->state->current_token, false ); |
|
4826 return true; |
|
4827 |
|
4828 /* |
|
4829 * > A comment token |
|
4830 */ |
|
4831 case '#comment': |
|
4832 case '#funky-comment': |
|
4833 case '#presumptuous-tag': |
|
4834 $this->insert_foreign_element( $this->state->current_token, false ); |
|
4835 return true; |
|
4836 |
|
4837 /* |
|
4838 * > A DOCTYPE token |
|
4839 */ |
|
4840 case 'html': |
|
4841 // Parse error: ignore the token. |
|
4842 return $this->step(); |
|
4843 |
|
4844 /* |
|
4845 * > A start tag whose tag name is "b", "big", "blockquote", "body", "br", "center", |
|
4846 * > "code", "dd", "div", "dl", "dt", "em", "embed", "h1", "h2", "h3", "h4", "h5", |
|
4847 * > "h6", "head", "hr", "i", "img", "li", "listing", "menu", "meta", "nobr", "ol", |
|
4848 * > "p", "pre", "ruby", "s", "small", "span", "strong", "strike", "sub", "sup", |
|
4849 * > "table", "tt", "u", "ul", "var" |
|
4850 * |
|
4851 * > A start tag whose name is "font", if the token has any attributes named "color", "face", or "size" |
|
4852 * |
|
4853 * > An end tag whose tag name is "br", "p" |
|
4854 * |
|
4855 * Closing BR tags are always reported by the Tag Processor as opening tags. |
|
4856 */ |
|
4857 case '+B': |
|
4858 case '+BIG': |
|
4859 case '+BLOCKQUOTE': |
|
4860 case '+BODY': |
|
4861 case '+BR': |
|
4862 case '+CENTER': |
|
4863 case '+CODE': |
|
4864 case '+DD': |
|
4865 case '+DIV': |
|
4866 case '+DL': |
|
4867 case '+DT': |
|
4868 case '+EM': |
|
4869 case '+EMBED': |
|
4870 case '+H1': |
|
4871 case '+H2': |
|
4872 case '+H3': |
|
4873 case '+H4': |
|
4874 case '+H5': |
|
4875 case '+H6': |
|
4876 case '+HEAD': |
|
4877 case '+HR': |
|
4878 case '+I': |
|
4879 case '+IMG': |
|
4880 case '+LI': |
|
4881 case '+LISTING': |
|
4882 case '+MENU': |
|
4883 case '+META': |
|
4884 case '+NOBR': |
|
4885 case '+OL': |
|
4886 case '+P': |
|
4887 case '+PRE': |
|
4888 case '+RUBY': |
|
4889 case '+S': |
|
4890 case '+SMALL': |
|
4891 case '+SPAN': |
|
4892 case '+STRONG': |
|
4893 case '+STRIKE': |
|
4894 case '+SUB': |
|
4895 case '+SUP': |
|
4896 case '+TABLE': |
|
4897 case '+TT': |
|
4898 case '+U': |
|
4899 case '+UL': |
|
4900 case '+VAR': |
|
4901 case '+FONT with attributes': |
|
4902 case '-BR': |
|
4903 case '-P': |
|
4904 // @todo Indicate a parse error once it's possible. |
|
4905 foreach ( $this->state->stack_of_open_elements->walk_up() as $current_node ) { |
|
4906 if ( |
|
4907 'math' === $current_node->integration_node_type || |
|
4908 'html' === $current_node->integration_node_type || |
|
4909 'html' === $current_node->namespace |
|
4910 ) { |
|
4911 break; |
|
4912 } |
|
4913 |
|
4914 $this->state->stack_of_open_elements->pop(); |
|
4915 } |
|
4916 goto in_foreign_content_process_in_current_insertion_mode; |
|
4917 } |
|
4918 |
|
4919 /* |
|
4920 * > Any other start tag |
|
4921 */ |
|
4922 if ( ! $this->is_tag_closer() ) { |
|
4923 $this->insert_foreign_element( $this->state->current_token, false ); |
|
4924 |
|
4925 /* |
|
4926 * > If the token has its self-closing flag set, then run |
|
4927 * > the appropriate steps from the following list: |
|
4928 * > |
|
4929 * > ↪ the token's tag name is "script", and the new current node is in the SVG namespace |
|
4930 * > Acknowledge the token's self-closing flag, and then act as |
|
4931 * > described in the steps for a "script" end tag below. |
|
4932 * > |
|
4933 * > ↪ Otherwise |
|
4934 * > Pop the current node off the stack of open elements and |
|
4935 * > acknowledge the token's self-closing flag. |
|
4936 * |
|
4937 * Since the rules for SCRIPT below indicate to pop the element off of the stack of |
|
4938 * open elements, which is the same for the Otherwise condition, there's no need to |
|
4939 * separate these checks. The difference comes when a parser operates with the scripting |
|
4940 * flag enabled, and executes the script, which this parser does not support. |
|
4941 */ |
|
4942 if ( $this->state->current_token->has_self_closing_flag ) { |
|
4943 $this->state->stack_of_open_elements->pop(); |
|
4944 } |
|
4945 return true; |
|
4946 } |
|
4947 |
|
4948 /* |
|
4949 * > An end tag whose name is "script", if the current node is an SVG script element. |
|
4950 */ |
|
4951 if ( $this->is_tag_closer() && 'SCRIPT' === $this->state->current_token->node_name && 'svg' === $this->state->current_token->namespace ) { |
|
4952 $this->state->stack_of_open_elements->pop(); |
|
4953 return true; |
|
4954 } |
|
4955 |
|
4956 /* |
|
4957 * > Any other end tag |
|
4958 */ |
|
4959 if ( $this->is_tag_closer() ) { |
|
4960 $node = $this->state->stack_of_open_elements->current_node(); |
|
4961 if ( $tag_name !== $node->node_name ) { |
|
4962 // @todo Indicate a parse error once it's possible. |
|
4963 } |
|
4964 in_foreign_content_end_tag_loop: |
|
4965 if ( $node === $this->state->stack_of_open_elements->at( 1 ) ) { |
|
4966 return true; |
|
4967 } |
|
4968 |
|
4969 /* |
|
4970 * > If node's tag name, converted to ASCII lowercase, is the same as the tag name |
|
4971 * > of the token, pop elements from the stack of open elements until node has |
|
4972 * > been popped from the stack, and then return. |
|
4973 */ |
|
4974 if ( 0 === strcasecmp( $node->node_name, $tag_name ) ) { |
|
4975 foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { |
|
4976 $this->state->stack_of_open_elements->pop(); |
|
4977 if ( $node === $item ) { |
|
4978 return true; |
|
4979 } |
|
4980 } |
|
4981 } |
|
4982 |
|
4983 foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $item ) { |
|
4984 $node = $item; |
|
4985 break; |
|
4986 } |
|
4987 |
|
4988 if ( 'html' !== $node->namespace ) { |
|
4989 goto in_foreign_content_end_tag_loop; |
|
4990 } |
|
4991 |
|
4992 in_foreign_content_process_in_current_insertion_mode: |
|
4993 switch ( $this->state->insertion_mode ) { |
|
4994 case WP_HTML_Processor_State::INSERTION_MODE_INITIAL: |
|
4995 return $this->step_initial(); |
|
4996 |
|
4997 case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HTML: |
|
4998 return $this->step_before_html(); |
|
4999 |
|
5000 case WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD: |
|
5001 return $this->step_before_head(); |
|
5002 |
|
5003 case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD: |
|
5004 return $this->step_in_head(); |
|
5005 |
|
5006 case WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD_NOSCRIPT: |
|
5007 return $this->step_in_head_noscript(); |
|
5008 |
|
5009 case WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD: |
|
5010 return $this->step_after_head(); |
|
5011 |
|
5012 case WP_HTML_Processor_State::INSERTION_MODE_IN_BODY: |
|
5013 return $this->step_in_body(); |
|
5014 |
|
5015 case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE: |
|
5016 return $this->step_in_table(); |
|
5017 |
|
5018 case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_TEXT: |
|
5019 return $this->step_in_table_text(); |
|
5020 |
|
5021 case WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION: |
|
5022 return $this->step_in_caption(); |
|
5023 |
|
5024 case WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP: |
|
5025 return $this->step_in_column_group(); |
|
5026 |
|
5027 case WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY: |
|
5028 return $this->step_in_table_body(); |
|
5029 |
|
5030 case WP_HTML_Processor_State::INSERTION_MODE_IN_ROW: |
|
5031 return $this->step_in_row(); |
|
5032 |
|
5033 case WP_HTML_Processor_State::INSERTION_MODE_IN_CELL: |
|
5034 return $this->step_in_cell(); |
|
5035 |
|
5036 case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT: |
|
5037 return $this->step_in_select(); |
|
5038 |
|
5039 case WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE: |
|
5040 return $this->step_in_select_in_table(); |
|
5041 |
|
5042 case WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE: |
|
5043 return $this->step_in_template(); |
|
5044 |
|
5045 case WP_HTML_Processor_State::INSERTION_MODE_AFTER_BODY: |
|
5046 return $this->step_after_body(); |
|
5047 |
|
5048 case WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET: |
|
5049 return $this->step_in_frameset(); |
|
5050 |
|
5051 case WP_HTML_Processor_State::INSERTION_MODE_AFTER_FRAMESET: |
|
5052 return $this->step_after_frameset(); |
|
5053 |
|
5054 case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_BODY: |
|
5055 return $this->step_after_after_body(); |
|
5056 |
|
5057 case WP_HTML_Processor_State::INSERTION_MODE_AFTER_AFTER_FRAMESET: |
|
5058 return $this->step_after_after_frameset(); |
|
5059 |
|
5060 // This should be unreachable but PHP doesn't have total type checking on switch. |
|
5061 default: |
|
5062 $this->bail( "Unaware of the requested parsing mode: '{$this->state->insertion_mode}'." ); |
|
5063 } |
|
5064 } |
|
5065 |
|
5066 $this->bail( 'Should not have been able to reach end of IN FOREIGN CONTENT processing. Check HTML API code.' ); |
|
5067 // This unnecessary return prevents tools from inaccurately reporting type errors. |
|
5068 return false; |
|
1449 } |
5069 } |
1450 |
5070 |
1451 /* |
5071 /* |
1452 * Internal helpers |
5072 * Internal helpers |
1453 */ |
5073 */ |
1474 /* |
5094 /* |
1475 * HTML semantic overrides for Tag Processor |
5095 * HTML semantic overrides for Tag Processor |
1476 */ |
5096 */ |
1477 |
5097 |
1478 /** |
5098 /** |
5099 * Indicates the namespace of the current token, or "html" if there is none. |
|
5100 * |
|
5101 * @return string One of "html", "math", or "svg". |
|
5102 */ |
|
5103 public function get_namespace(): string { |
|
5104 if ( ! isset( $this->current_element ) ) { |
|
5105 return parent::get_namespace(); |
|
5106 } |
|
5107 |
|
5108 return $this->current_element->token->namespace; |
|
5109 } |
|
5110 |
|
5111 /** |
|
1479 * Returns the uppercase name of the matched tag. |
5112 * Returns the uppercase name of the matched tag. |
1480 * |
5113 * |
1481 * The semantic rules for HTML specify that certain tags be reprocessed |
5114 * The semantic rules for HTML specify that certain tags be reprocessed |
1482 * with a different tag name. Because of this, the tag name presented |
5115 * with a different tag name. Because of this, the tag name presented |
1483 * by the HTML Processor may differ from the one reported by the HTML |
5116 * by the HTML Processor may differ from the one reported by the HTML |
1494 * |
5127 * |
1495 * @since 6.4.0 |
5128 * @since 6.4.0 |
1496 * |
5129 * |
1497 * @return string|null Name of currently matched tag in input HTML, or `null` if none found. |
5130 * @return string|null Name of currently matched tag in input HTML, or `null` if none found. |
1498 */ |
5131 */ |
1499 public function get_tag() { |
5132 public function get_tag(): ?string { |
1500 if ( null !== $this->last_error ) { |
5133 if ( null !== $this->last_error ) { |
1501 return null; |
5134 return null; |
1502 } |
5135 } |
1503 |
5136 |
1504 if ( $this->is_virtual() ) { |
5137 if ( $this->is_virtual() ) { |
1505 return $this->current_element->token->node_name; |
5138 return $this->current_element->token->node_name; |
1506 } |
5139 } |
1507 |
5140 |
1508 $tag_name = parent::get_tag(); |
5141 $tag_name = parent::get_tag(); |
1509 |
5142 |
1510 switch ( $tag_name ) { |
5143 /* |
1511 case 'IMAGE': |
5144 * > A start tag whose tag name is "image" |
1512 /* |
5145 * > Change the token's tag name to "img" and reprocess it. (Don't ask.) |
1513 * > A start tag whose tag name is "image" |
5146 */ |
1514 * > Change the token's tag name to "img" and reprocess it. (Don't ask.) |
5147 return ( 'IMAGE' === $tag_name && 'html' === $this->get_namespace() ) |
1515 */ |
5148 ? 'IMG' |
1516 return 'IMG'; |
5149 : $tag_name; |
1517 |
|
1518 default: |
|
1519 return $tag_name; |
|
1520 } |
|
1521 } |
5150 } |
1522 |
5151 |
1523 /** |
5152 /** |
1524 * Indicates if the currently matched tag contains the self-closing flag. |
5153 * Indicates if the currently matched tag contains the self-closing flag. |
1525 * |
5154 * |
1535 * |
5164 * |
1536 * @since 6.6.0 Subclassed for the HTML Processor. |
5165 * @since 6.6.0 Subclassed for the HTML Processor. |
1537 * |
5166 * |
1538 * @return bool Whether the currently matched tag contains the self-closing flag. |
5167 * @return bool Whether the currently matched tag contains the self-closing flag. |
1539 */ |
5168 */ |
1540 public function has_self_closing_flag() { |
5169 public function has_self_closing_flag(): bool { |
1541 return $this->is_virtual() ? false : parent::has_self_closing_flag(); |
5170 return $this->is_virtual() ? false : parent::has_self_closing_flag(); |
1542 } |
5171 } |
1543 |
5172 |
1544 /** |
5173 /** |
1545 * Returns the node name represented by the token. |
5174 * Returns the node name represented by the token. |
1559 * |
5188 * |
1560 * @since 6.6.0 Subclassed for the HTML Processor. |
5189 * @since 6.6.0 Subclassed for the HTML Processor. |
1561 * |
5190 * |
1562 * @return string|null Name of the matched token. |
5191 * @return string|null Name of the matched token. |
1563 */ |
5192 */ |
1564 public function get_token_name() { |
5193 public function get_token_name(): ?string { |
1565 return $this->is_virtual() |
5194 return $this->is_virtual() |
1566 ? $this->current_element->token->node_name |
5195 ? $this->current_element->token->node_name |
1567 : parent::get_token_name(); |
5196 : parent::get_token_name(); |
1568 } |
5197 } |
1569 |
5198 |
1587 * |
5216 * |
1588 * @since 6.6.0 Subclassed for the HTML Processor. |
5217 * @since 6.6.0 Subclassed for the HTML Processor. |
1589 * |
5218 * |
1590 * @return string|null What kind of token is matched, or null. |
5219 * @return string|null What kind of token is matched, or null. |
1591 */ |
5220 */ |
1592 public function get_token_type() { |
5221 public function get_token_type(): ?string { |
1593 if ( $this->is_virtual() ) { |
5222 if ( $this->is_virtual() ) { |
1594 /* |
5223 /* |
1595 * This logic comes from the Tag Processor. |
5224 * This logic comes from the Tag Processor. |
1596 * |
5225 * |
1597 * @todo It would be ideal not to repeat this here, but it's not clearly |
5226 * @todo It would be ideal not to repeat this here, but it's not clearly |
1649 * |
5278 * |
1650 * @param string $name The attribute name to target. |
5279 * @param string $name The attribute name to target. |
1651 * @param string|bool $value The new attribute value. |
5280 * @param string|bool $value The new attribute value. |
1652 * @return bool Whether an attribute value was set. |
5281 * @return bool Whether an attribute value was set. |
1653 */ |
5282 */ |
1654 public function set_attribute( $name, $value ) { |
5283 public function set_attribute( $name, $value ): bool { |
1655 return $this->is_virtual() ? false : parent::set_attribute( $name, $value ); |
5284 return $this->is_virtual() ? false : parent::set_attribute( $name, $value ); |
1656 } |
5285 } |
1657 |
5286 |
1658 /** |
5287 /** |
1659 * Remove an attribute from the currently-matched tag. |
5288 * Remove an attribute from the currently-matched tag. |
1661 * @since 6.6.0 Subclassed for HTML Processor. |
5290 * @since 6.6.0 Subclassed for HTML Processor. |
1662 * |
5291 * |
1663 * @param string $name The attribute name to remove. |
5292 * @param string $name The attribute name to remove. |
1664 * @return bool Whether an attribute was removed. |
5293 * @return bool Whether an attribute was removed. |
1665 */ |
5294 */ |
1666 public function remove_attribute( $name ) { |
5295 public function remove_attribute( $name ): bool { |
1667 return $this->is_virtual() ? false : parent::remove_attribute( $name ); |
5296 return $this->is_virtual() ? false : parent::remove_attribute( $name ); |
1668 } |
5297 } |
1669 |
5298 |
1670 /** |
5299 /** |
1671 * Gets lowercase names of all attributes matching a given prefix in the current tag. |
5300 * Gets lowercase names of all attributes matching a given prefix in the current tag. |
1691 * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive |
5320 * @see https://html.spec.whatwg.org/multipage/syntax.html#attributes-2:ascii-case-insensitive |
1692 * |
5321 * |
1693 * @param string $prefix Prefix of requested attribute names. |
5322 * @param string $prefix Prefix of requested attribute names. |
1694 * @return array|null List of attribute names, or `null` when no tag opener is matched. |
5323 * @return array|null List of attribute names, or `null` when no tag opener is matched. |
1695 */ |
5324 */ |
1696 public function get_attribute_names_with_prefix( $prefix ) { |
5325 public function get_attribute_names_with_prefix( $prefix ): ?array { |
1697 return $this->is_virtual() ? null : parent::get_attribute_names_with_prefix( $prefix ); |
5326 return $this->is_virtual() ? null : parent::get_attribute_names_with_prefix( $prefix ); |
1698 } |
5327 } |
1699 |
5328 |
1700 /** |
5329 /** |
1701 * Adds a new class name to the currently matched tag. |
5330 * Adds a new class name to the currently matched tag. |
1703 * @since 6.6.0 Subclassed for the HTML Processor. |
5332 * @since 6.6.0 Subclassed for the HTML Processor. |
1704 * |
5333 * |
1705 * @param string $class_name The class name to add. |
5334 * @param string $class_name The class name to add. |
1706 * @return bool Whether the class was set to be added. |
5335 * @return bool Whether the class was set to be added. |
1707 */ |
5336 */ |
1708 public function add_class( $class_name ) { |
5337 public function add_class( $class_name ): bool { |
1709 return $this->is_virtual() ? false : parent::add_class( $class_name ); |
5338 return $this->is_virtual() ? false : parent::add_class( $class_name ); |
1710 } |
5339 } |
1711 |
5340 |
1712 /** |
5341 /** |
1713 * Removes a class name from the currently matched tag. |
5342 * Removes a class name from the currently matched tag. |
1715 * @since 6.6.0 Subclassed for the HTML Processor. |
5344 * @since 6.6.0 Subclassed for the HTML Processor. |
1716 * |
5345 * |
1717 * @param string $class_name The class name to remove. |
5346 * @param string $class_name The class name to remove. |
1718 * @return bool Whether the class was set to be removed. |
5347 * @return bool Whether the class was set to be removed. |
1719 */ |
5348 */ |
1720 public function remove_class( $class_name ) { |
5349 public function remove_class( $class_name ): bool { |
1721 return $this->is_virtual() ? false : parent::remove_class( $class_name ); |
5350 return $this->is_virtual() ? false : parent::remove_class( $class_name ); |
1722 } |
5351 } |
1723 |
5352 |
1724 /** |
5353 /** |
1725 * Returns if a matched tag contains the given ASCII case-insensitive class name. |
5354 * Returns if a matched tag contains the given ASCII case-insensitive class name. |
1726 * |
5355 * |
1727 * @since 6.6.0 Subclassed for the HTML Processor. |
5356 * @since 6.6.0 Subclassed for the HTML Processor. |
5357 * |
|
5358 * @todo When reconstructing active formatting elements with attributes, find a way |
|
5359 * to indicate if the virtually-reconstructed formatting elements contain the |
|
5360 * wanted class name. |
|
1728 * |
5361 * |
1729 * @param string $wanted_class Look for this CSS class name, ASCII case-insensitive. |
5362 * @param string $wanted_class Look for this CSS class name, ASCII case-insensitive. |
1730 * @return bool|null Whether the matched tag contains the given class name, or null if not matched. |
5363 * @return bool|null Whether the matched tag contains the given class name, or null if not matched. |
1731 */ |
5364 */ |
1732 public function has_class( $wanted_class ) { |
5365 public function has_class( $wanted_class ): ?bool { |
1733 return $this->is_virtual() ? null : parent::has_class( $wanted_class ); |
5366 return $this->is_virtual() ? null : parent::has_class( $wanted_class ); |
1734 } |
5367 } |
1735 |
5368 |
1736 /** |
5369 /** |
1737 * Generator for a foreach loop to step through each class name for the matched tag. |
5370 * Generator for a foreach loop to step through each class name for the matched tag. |
1771 * |
5404 * |
1772 * @since 6.6.0 Subclassed for the HTML Processor. |
5405 * @since 6.6.0 Subclassed for the HTML Processor. |
1773 * |
5406 * |
1774 * @return string |
5407 * @return string |
1775 */ |
5408 */ |
1776 public function get_modifiable_text() { |
5409 public function get_modifiable_text(): string { |
1777 return $this->is_virtual() ? '' : parent::get_modifiable_text(); |
5410 return $this->is_virtual() ? '' : parent::get_modifiable_text(); |
1778 } |
5411 } |
1779 |
5412 |
1780 /** |
5413 /** |
1781 * Indicates what kind of comment produced the comment node. |
5414 * Indicates what kind of comment produced the comment node. |
1794 * |
5427 * |
1795 * @since 6.6.0 Subclassed for the HTML Processor. |
5428 * @since 6.6.0 Subclassed for the HTML Processor. |
1796 * |
5429 * |
1797 * @return string|null |
5430 * @return string|null |
1798 */ |
5431 */ |
1799 public function get_comment_type() { |
5432 public function get_comment_type(): ?string { |
1800 return $this->is_virtual() ? null : parent::get_comment_type(); |
5433 return $this->is_virtual() ? null : parent::get_comment_type(); |
1801 } |
5434 } |
1802 |
5435 |
1803 /** |
5436 /** |
1804 * Removes a bookmark that is no longer needed. |
5437 * Removes a bookmark that is no longer needed. |
1809 * @since 6.4.0 |
5442 * @since 6.4.0 |
1810 * |
5443 * |
1811 * @param string $bookmark_name Name of the bookmark to remove. |
5444 * @param string $bookmark_name Name of the bookmark to remove. |
1812 * @return bool Whether the bookmark already existed before removal. |
5445 * @return bool Whether the bookmark already existed before removal. |
1813 */ |
5446 */ |
1814 public function release_bookmark( $bookmark_name ) { |
5447 public function release_bookmark( $bookmark_name ): bool { |
1815 return parent::release_bookmark( "_{$bookmark_name}" ); |
5448 return parent::release_bookmark( "_{$bookmark_name}" ); |
1816 } |
5449 } |
1817 |
5450 |
1818 /** |
5451 /** |
1819 * Moves the internal cursor in the HTML Processor to a given bookmark's location. |
5452 * Moves the internal cursor in the HTML Processor to a given bookmark's location. |
1830 * @since 6.4.0 |
5463 * @since 6.4.0 |
1831 * |
5464 * |
1832 * @param string $bookmark_name Jump to the place in the document identified by this bookmark name. |
5465 * @param string $bookmark_name Jump to the place in the document identified by this bookmark name. |
1833 * @return bool Whether the internal cursor was successfully moved to the bookmark's location. |
5466 * @return bool Whether the internal cursor was successfully moved to the bookmark's location. |
1834 */ |
5467 */ |
1835 public function seek( $bookmark_name ) { |
5468 public function seek( $bookmark_name ): bool { |
1836 // Flush any pending updates to the document before beginning. |
5469 // Flush any pending updates to the document before beginning. |
1837 $this->get_updated_html(); |
5470 $this->get_updated_html(); |
1838 |
5471 |
1839 $actual_bookmark_name = "_{$bookmark_name}"; |
5472 $actual_bookmark_name = "_{$bookmark_name}"; |
1840 $processor_started_at = $this->state->current_token |
5473 $processor_started_at = $this->state->current_token |
1841 ? $this->bookmarks[ $this->state->current_token->bookmark_name ]->start |
5474 ? $this->bookmarks[ $this->state->current_token->bookmark_name ]->start |
1842 : 0; |
5475 : 0; |
1843 $bookmark_starts_at = $this->bookmarks[ $actual_bookmark_name ]->start; |
5476 $bookmark_starts_at = $this->bookmarks[ $actual_bookmark_name ]->start; |
1844 $bookmark_length = $this->bookmarks[ $actual_bookmark_name ]->length; |
|
1845 $direction = $bookmark_starts_at > $processor_started_at ? 'forward' : 'backward'; |
5477 $direction = $bookmark_starts_at > $processor_started_at ? 'forward' : 'backward'; |
1846 |
5478 |
1847 /* |
5479 /* |
1848 * If seeking backwards, it's possible that the sought-after bookmark exists within an element |
5480 * If seeking backwards, it's possible that the sought-after bookmark exists within an element |
1849 * which has been closed before the current cursor; in other words, it has already been removed |
5481 * which has been closed before the current cursor; in other words, it has already been removed |
1872 * being inspected. Once an element closes, if there are no bookmarks pointing to locations |
5504 * being inspected. Once an element closes, if there are no bookmarks pointing to locations |
1873 * within that element, then all of these locations may be forgotten to save on memory use |
5505 * within that element, then all of these locations may be forgotten to save on memory use |
1874 * and computation time. |
5506 * and computation time. |
1875 */ |
5507 */ |
1876 if ( 'backward' === $direction ) { |
5508 if ( 'backward' === $direction ) { |
1877 /* |
5509 |
1878 * Instead of clearing the parser state and starting fresh, calling the stack methods |
5510 /* |
1879 * maintains the proper flags in the parser. |
5511 * When moving backward, stateful stacks should be cleared. |
1880 */ |
5512 */ |
1881 foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { |
5513 foreach ( $this->state->stack_of_open_elements->walk_up() as $item ) { |
1882 if ( 'context-node' === $item->bookmark_name ) { |
|
1883 break; |
|
1884 } |
|
1885 |
|
1886 $this->state->stack_of_open_elements->remove_node( $item ); |
5514 $this->state->stack_of_open_elements->remove_node( $item ); |
1887 } |
5515 } |
1888 |
5516 |
1889 foreach ( $this->state->active_formatting_elements->walk_up() as $item ) { |
5517 foreach ( $this->state->active_formatting_elements->walk_up() as $item ) { |
1890 if ( 'context-node' === $item->bookmark_name ) { |
|
1891 break; |
|
1892 } |
|
1893 |
|
1894 $this->state->active_formatting_elements->remove_node( $item ); |
5518 $this->state->active_formatting_elements->remove_node( $item ); |
1895 } |
5519 } |
1896 |
5520 |
1897 parent::seek( 'context-node' ); |
5521 /* |
1898 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; |
5522 * **After** clearing stacks, more processor state can be reset. |
1899 $this->state->frameset_ok = true; |
5523 * This must be done after clearing the stack because those stacks generate events that |
1900 $this->element_queue = array(); |
5524 * would appear on a subsequent call to `next_token()`. |
1901 $this->current_element = null; |
5525 */ |
1902 } |
5526 $this->state->frameset_ok = true; |
1903 |
5527 $this->state->stack_of_template_insertion_modes = array(); |
1904 // When moving forwards, reparse the document until reaching the same location as the original bookmark. |
5528 $this->state->head_element = null; |
1905 if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) { |
5529 $this->state->form_element = null; |
1906 return true; |
5530 $this->state->current_token = null; |
1907 } |
5531 $this->current_element = null; |
1908 |
5532 $this->element_queue = array(); |
1909 while ( $this->next_token() ) { |
5533 |
5534 /* |
|
5535 * The absence of a context node indicates a full parse. |
|
5536 * The presence of a context node indicates a fragment parser. |
|
5537 */ |
|
5538 if ( null === $this->context_node ) { |
|
5539 $this->change_parsing_namespace( 'html' ); |
|
5540 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_INITIAL; |
|
5541 $this->breadcrumbs = array(); |
|
5542 |
|
5543 $this->bookmarks['initial'] = new WP_HTML_Span( 0, 0 ); |
|
5544 parent::seek( 'initial' ); |
|
5545 unset( $this->bookmarks['initial'] ); |
|
5546 } else { |
|
5547 |
|
5548 /* |
|
5549 * Push the root-node (HTML) back onto the stack of open elements. |
|
5550 * |
|
5551 * Fragment parsers require this extra bit of setup. |
|
5552 * It's handled in full parsers by advancing the processor state. |
|
5553 */ |
|
5554 $this->state->stack_of_open_elements->push( |
|
5555 new WP_HTML_Token( |
|
5556 'root-node', |
|
5557 'HTML', |
|
5558 false |
|
5559 ) |
|
5560 ); |
|
5561 |
|
5562 $this->change_parsing_namespace( |
|
5563 $this->context_node->integration_node_type |
|
5564 ? 'html' |
|
5565 : $this->context_node->namespace |
|
5566 ); |
|
5567 |
|
5568 if ( 'TEMPLATE' === $this->context_node->node_name ) { |
|
5569 $this->state->stack_of_template_insertion_modes[] = WP_HTML_Processor_State::INSERTION_MODE_IN_TEMPLATE; |
|
5570 } |
|
5571 |
|
5572 $this->reset_insertion_mode_appropriately(); |
|
5573 $this->breadcrumbs = array_slice( $this->breadcrumbs, 0, 2 ); |
|
5574 parent::seek( $this->context_node->bookmark_name ); |
|
5575 } |
|
5576 } |
|
5577 |
|
5578 /* |
|
5579 * Here, the processor moves forward through the document until it matches the bookmark. |
|
5580 * do-while is used here because the processor is expected to already be stopped on |
|
5581 * a token than may match the bookmarked location. |
|
5582 */ |
|
5583 do { |
|
5584 /* |
|
5585 * The processor will stop on virtual tokens, but bookmarks may not be set on them. |
|
5586 * They should not be matched when seeking a bookmark, skip them. |
|
5587 */ |
|
5588 if ( $this->is_virtual() ) { |
|
5589 continue; |
|
5590 } |
|
1910 if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) { |
5591 if ( $bookmark_starts_at === $this->bookmarks[ $this->state->current_token->bookmark_name ]->start ) { |
1911 while ( isset( $this->current_element ) && WP_HTML_Stack_Event::POP === $this->current_element->operation ) { |
|
1912 $this->current_element = array_shift( $this->element_queue ); |
|
1913 } |
|
1914 return true; |
5592 return true; |
1915 } |
5593 } |
1916 } |
5594 } while ( $this->next_token() ); |
1917 |
5595 |
1918 return false; |
5596 return false; |
1919 } |
5597 } |
1920 |
5598 |
1921 /** |
5599 /** |
1991 * Bookmarks are a powerful tool to enable complicated behavior. |
5669 * Bookmarks are a powerful tool to enable complicated behavior. |
1992 * Consider double-checking that you need this tool if you are |
5670 * Consider double-checking that you need this tool if you are |
1993 * reaching for it, as inappropriate use could lead to broken |
5671 * reaching for it, as inappropriate use could lead to broken |
1994 * HTML structure or unwanted processing overhead. |
5672 * HTML structure or unwanted processing overhead. |
1995 * |
5673 * |
5674 * Bookmarks cannot be set on tokens that do no appear in the original |
|
5675 * HTML text. For example, the HTML `<table><td>` stops at tags `TABLE`, |
|
5676 * `TBODY`, `TR`, and `TD`. The `TBODY` and `TR` tags do not appear in |
|
5677 * the original HTML and cannot be used as bookmarks. |
|
5678 * |
|
1996 * @since 6.4.0 |
5679 * @since 6.4.0 |
1997 * |
5680 * |
1998 * @param string $bookmark_name Identifies this particular bookmark. |
5681 * @param string $bookmark_name Identifies this particular bookmark. |
1999 * @return bool Whether the bookmark was successfully created. |
5682 * @return bool Whether the bookmark was successfully created. |
2000 */ |
5683 */ |
2001 public function set_bookmark( $bookmark_name ) { |
5684 public function set_bookmark( $bookmark_name ): bool { |
5685 if ( $this->is_virtual() ) { |
|
5686 _doing_it_wrong( |
|
5687 __METHOD__, |
|
5688 __( 'Cannot set bookmarks on tokens that do no appear in the original HTML text.' ), |
|
5689 '6.8.0' |
|
5690 ); |
|
5691 return false; |
|
5692 } |
|
2002 return parent::set_bookmark( "_{$bookmark_name}" ); |
5693 return parent::set_bookmark( "_{$bookmark_name}" ); |
2003 } |
5694 } |
2004 |
5695 |
2005 /** |
5696 /** |
2006 * Checks whether a bookmark with the given name exists. |
5697 * Checks whether a bookmark with the given name exists. |
2008 * @since 6.5.0 |
5699 * @since 6.5.0 |
2009 * |
5700 * |
2010 * @param string $bookmark_name Name to identify a bookmark that potentially exists. |
5701 * @param string $bookmark_name Name to identify a bookmark that potentially exists. |
2011 * @return bool Whether that bookmark exists. |
5702 * @return bool Whether that bookmark exists. |
2012 */ |
5703 */ |
2013 public function has_bookmark( $bookmark_name ) { |
5704 public function has_bookmark( $bookmark_name ): bool { |
2014 return parent::has_bookmark( "_{$bookmark_name}" ); |
5705 return parent::has_bookmark( "_{$bookmark_name}" ); |
2015 } |
5706 } |
2016 |
5707 |
2017 /* |
5708 /* |
2018 * HTML Parsing Algorithms |
5709 * HTML Parsing Algorithms |
2025 * |
5716 * |
2026 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
5717 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
2027 * |
5718 * |
2028 * @see https://html.spec.whatwg.org/#close-a-p-element |
5719 * @see https://html.spec.whatwg.org/#close-a-p-element |
2029 */ |
5720 */ |
2030 private function close_a_p_element() { |
5721 private function close_a_p_element(): void { |
2031 $this->generate_implied_end_tags( 'P' ); |
5722 $this->generate_implied_end_tags( 'P' ); |
2032 $this->state->stack_of_open_elements->pop_until( 'P' ); |
5723 $this->state->stack_of_open_elements->pop_until( 'P' ); |
2033 } |
5724 } |
2034 |
5725 |
2035 /** |
5726 /** |
2036 * Closes elements that have implied end tags. |
5727 * Closes elements that have implied end tags. |
2037 * |
5728 * |
2038 * @since 6.4.0 |
5729 * @since 6.4.0 |
5730 * @since 6.7.0 Full spec support. |
|
2039 * |
5731 * |
2040 * @see https://html.spec.whatwg.org/#generate-implied-end-tags |
5732 * @see https://html.spec.whatwg.org/#generate-implied-end-tags |
2041 * |
5733 * |
2042 * @param string|null $except_for_this_element Perform as if this element doesn't exist in the stack of open elements. |
5734 * @param string|null $except_for_this_element Perform as if this element doesn't exist in the stack of open elements. |
2043 */ |
5735 */ |
2044 private function generate_implied_end_tags( $except_for_this_element = null ) { |
5736 private function generate_implied_end_tags( ?string $except_for_this_element = null ): void { |
2045 $elements_with_implied_end_tags = array( |
5737 $elements_with_implied_end_tags = array( |
2046 'DD', |
5738 'DD', |
2047 'DT', |
5739 'DT', |
2048 'LI', |
5740 'LI', |
5741 'OPTGROUP', |
|
5742 'OPTION', |
|
2049 'P', |
5743 'P', |
5744 'RB', |
|
5745 'RP', |
|
5746 'RT', |
|
5747 'RTC', |
|
2050 ); |
5748 ); |
2051 |
5749 |
2052 $current_node = $this->state->stack_of_open_elements->current_node(); |
5750 $no_exclusions = ! isset( $except_for_this_element ); |
5751 |
|
2053 while ( |
5752 while ( |
2054 $current_node && $current_node->node_name !== $except_for_this_element && |
5753 ( $no_exclusions || ! $this->state->stack_of_open_elements->current_node_is( $except_for_this_element ) ) && |
2055 in_array( $this->state->stack_of_open_elements->current_node(), $elements_with_implied_end_tags, true ) |
5754 in_array( $this->state->stack_of_open_elements->current_node()->node_name, $elements_with_implied_end_tags, true ) |
2056 ) { |
5755 ) { |
2057 $this->state->stack_of_open_elements->pop(); |
5756 $this->state->stack_of_open_elements->pop(); |
2058 } |
5757 } |
2059 } |
5758 } |
2060 |
5759 |
2063 * |
5762 * |
2064 * See the HTML specification for an explanation why this is |
5763 * See the HTML specification for an explanation why this is |
2065 * different from generating end tags in the normal sense. |
5764 * different from generating end tags in the normal sense. |
2066 * |
5765 * |
2067 * @since 6.4.0 |
5766 * @since 6.4.0 |
5767 * @since 6.7.0 Full spec support. |
|
2068 * |
5768 * |
2069 * @see WP_HTML_Processor::generate_implied_end_tags |
5769 * @see WP_HTML_Processor::generate_implied_end_tags |
2070 * @see https://html.spec.whatwg.org/#generate-implied-end-tags |
5770 * @see https://html.spec.whatwg.org/#generate-implied-end-tags |
2071 */ |
5771 */ |
2072 private function generate_implied_end_tags_thoroughly() { |
5772 private function generate_implied_end_tags_thoroughly(): void { |
2073 $elements_with_implied_end_tags = array( |
5773 $elements_with_implied_end_tags = array( |
5774 'CAPTION', |
|
5775 'COLGROUP', |
|
2074 'DD', |
5776 'DD', |
2075 'DT', |
5777 'DT', |
2076 'LI', |
5778 'LI', |
5779 'OPTGROUP', |
|
5780 'OPTION', |
|
2077 'P', |
5781 'P', |
5782 'RB', |
|
5783 'RP', |
|
5784 'RT', |
|
5785 'RTC', |
|
5786 'TBODY', |
|
5787 'TD', |
|
5788 'TFOOT', |
|
5789 'TH', |
|
5790 'THEAD', |
|
5791 'TR', |
|
2078 ); |
5792 ); |
2079 |
5793 |
2080 while ( in_array( $this->state->stack_of_open_elements->current_node(), $elements_with_implied_end_tags, true ) ) { |
5794 while ( in_array( $this->state->stack_of_open_elements->current_node()->node_name, $elements_with_implied_end_tags, true ) ) { |
2081 $this->state->stack_of_open_elements->pop(); |
5795 $this->state->stack_of_open_elements->pop(); |
2082 } |
5796 } |
5797 } |
|
5798 |
|
5799 /** |
|
5800 * Returns the adjusted current node. |
|
5801 * |
|
5802 * > The adjusted current node is the context element if the parser was created as |
|
5803 * > part of the HTML fragment parsing algorithm and the stack of open elements |
|
5804 * > has only one element in it (fragment case); otherwise, the adjusted current |
|
5805 * > node is the current node. |
|
5806 * |
|
5807 * @see https://html.spec.whatwg.org/#adjusted-current-node |
|
5808 * |
|
5809 * @since 6.7.0 |
|
5810 * |
|
5811 * @return WP_HTML_Token|null The adjusted current node. |
|
5812 */ |
|
5813 private function get_adjusted_current_node(): ?WP_HTML_Token { |
|
5814 if ( isset( $this->context_node ) && 1 === $this->state->stack_of_open_elements->count() ) { |
|
5815 return $this->context_node; |
|
5816 } |
|
5817 |
|
5818 return $this->state->stack_of_open_elements->current_node(); |
|
2083 } |
5819 } |
2084 |
5820 |
2085 /** |
5821 /** |
2086 * Reconstructs the active formatting elements. |
5822 * Reconstructs the active formatting elements. |
2087 * |
5823 * |
2095 * |
5831 * |
2096 * @see https://html.spec.whatwg.org/#reconstruct-the-active-formatting-elements |
5832 * @see https://html.spec.whatwg.org/#reconstruct-the-active-formatting-elements |
2097 * |
5833 * |
2098 * @return bool Whether any formatting elements needed to be reconstructed. |
5834 * @return bool Whether any formatting elements needed to be reconstructed. |
2099 */ |
5835 */ |
2100 private function reconstruct_active_formatting_elements() { |
5836 private function reconstruct_active_formatting_elements(): bool { |
2101 /* |
5837 /* |
2102 * > If there are no entries in the list of active formatting elements, then there is nothing |
5838 * > If there are no entries in the list of active formatting elements, then there is nothing |
2103 * > to reconstruct; stop this algorithm. |
5839 * > to reconstruct; stop this algorithm. |
2104 */ |
5840 */ |
2105 if ( 0 === $this->state->active_formatting_elements->count() ) { |
5841 if ( 0 === $this->state->active_formatting_elements->count() ) { |
2123 $this->state->stack_of_open_elements->contains_node( $last_entry ) |
5859 $this->state->stack_of_open_elements->contains_node( $last_entry ) |
2124 ) { |
5860 ) { |
2125 return false; |
5861 return false; |
2126 } |
5862 } |
2127 |
5863 |
2128 $this->last_error = self::ERROR_UNSUPPORTED; |
5864 $this->bail( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' ); |
2129 throw new WP_HTML_Unsupported_Exception( 'Cannot reconstruct active formatting elements when advancing and rewinding is required.' ); |
5865 } |
5866 |
|
5867 /** |
|
5868 * Runs the reset the insertion mode appropriately algorithm. |
|
5869 * |
|
5870 * @since 6.7.0 |
|
5871 * |
|
5872 * @see https://html.spec.whatwg.org/multipage/parsing.html#reset-the-insertion-mode-appropriately |
|
5873 */ |
|
5874 private function reset_insertion_mode_appropriately(): void { |
|
5875 // Set the first node. |
|
5876 $first_node = null; |
|
5877 foreach ( $this->state->stack_of_open_elements->walk_down() as $first_node ) { |
|
5878 break; |
|
5879 } |
|
5880 |
|
5881 /* |
|
5882 * > 1. Let _last_ be false. |
|
5883 */ |
|
5884 $last = false; |
|
5885 foreach ( $this->state->stack_of_open_elements->walk_up() as $node ) { |
|
5886 /* |
|
5887 * > 2. Let _node_ be the last node in the stack of open elements. |
|
5888 * > 3. _Loop_: If _node_ is the first node in the stack of open elements, then set _last_ |
|
5889 * > to true, and, if the parser was created as part of the HTML fragment parsing |
|
5890 * > algorithm (fragment case), set node to the context element passed to |
|
5891 * > that algorithm. |
|
5892 * > … |
|
5893 */ |
|
5894 if ( $node === $first_node ) { |
|
5895 $last = true; |
|
5896 if ( isset( $this->context_node ) ) { |
|
5897 $node = $this->context_node; |
|
5898 } |
|
5899 } |
|
5900 |
|
5901 // All of the following rules are for matching HTML elements. |
|
5902 if ( 'html' !== $node->namespace ) { |
|
5903 continue; |
|
5904 } |
|
5905 |
|
5906 switch ( $node->node_name ) { |
|
5907 /* |
|
5908 * > 4. If node is a `select` element, run these substeps: |
|
5909 * > 1. If _last_ is true, jump to the step below labeled done. |
|
5910 * > 2. Let _ancestor_ be _node_. |
|
5911 * > 3. _Loop_: If _ancestor_ is the first node in the stack of open elements, |
|
5912 * > jump to the step below labeled done. |
|
5913 * > 4. Let ancestor be the node before ancestor in the stack of open elements. |
|
5914 * > … |
|
5915 * > 7. Jump back to the step labeled _loop_. |
|
5916 * > 8. _Done_: Switch the insertion mode to "in select" and return. |
|
5917 */ |
|
5918 case 'SELECT': |
|
5919 if ( ! $last ) { |
|
5920 foreach ( $this->state->stack_of_open_elements->walk_up( $node ) as $ancestor ) { |
|
5921 if ( 'html' !== $ancestor->namespace ) { |
|
5922 continue; |
|
5923 } |
|
5924 |
|
5925 switch ( $ancestor->node_name ) { |
|
5926 /* |
|
5927 * > 5. If _ancestor_ is a `template` node, jump to the step below |
|
5928 * > labeled _done_. |
|
5929 */ |
|
5930 case 'TEMPLATE': |
|
5931 break 2; |
|
5932 |
|
5933 /* |
|
5934 * > 6. If _ancestor_ is a `table` node, switch the insertion mode to |
|
5935 * > "in select in table" and return. |
|
5936 */ |
|
5937 case 'TABLE': |
|
5938 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT_IN_TABLE; |
|
5939 return; |
|
5940 } |
|
5941 } |
|
5942 } |
|
5943 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_SELECT; |
|
5944 return; |
|
5945 |
|
5946 /* |
|
5947 * > 5. If _node_ is a `td` or `th` element and _last_ is false, then switch the |
|
5948 * > insertion mode to "in cell" and return. |
|
5949 */ |
|
5950 case 'TD': |
|
5951 case 'TH': |
|
5952 if ( ! $last ) { |
|
5953 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CELL; |
|
5954 return; |
|
5955 } |
|
5956 break; |
|
5957 |
|
5958 /* |
|
5959 * > 6. If _node_ is a `tr` element, then switch the insertion mode to "in row" |
|
5960 * > and return. |
|
5961 */ |
|
5962 case 'TR': |
|
5963 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; |
|
5964 return; |
|
5965 |
|
5966 /* |
|
5967 * > 7. If _node_ is a `tbody`, `thead`, or `tfoot` element, then switch the |
|
5968 * > insertion mode to "in table body" and return. |
|
5969 */ |
|
5970 case 'TBODY': |
|
5971 case 'THEAD': |
|
5972 case 'TFOOT': |
|
5973 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE_BODY; |
|
5974 return; |
|
5975 |
|
5976 /* |
|
5977 * > 8. If _node_ is a `caption` element, then switch the insertion mode to |
|
5978 * > "in caption" and return. |
|
5979 */ |
|
5980 case 'CAPTION': |
|
5981 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_CAPTION; |
|
5982 return; |
|
5983 |
|
5984 /* |
|
5985 * > 9. If _node_ is a `colgroup` element, then switch the insertion mode to |
|
5986 * > "in column group" and return. |
|
5987 */ |
|
5988 case 'COLGROUP': |
|
5989 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_COLUMN_GROUP; |
|
5990 return; |
|
5991 |
|
5992 /* |
|
5993 * > 10. If _node_ is a `table` element, then switch the insertion mode to |
|
5994 * > "in table" and return. |
|
5995 */ |
|
5996 case 'TABLE': |
|
5997 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_TABLE; |
|
5998 return; |
|
5999 |
|
6000 /* |
|
6001 * > 11. If _node_ is a `template` element, then switch the insertion mode to the |
|
6002 * > current template insertion mode and return. |
|
6003 */ |
|
6004 case 'TEMPLATE': |
|
6005 $this->state->insertion_mode = end( $this->state->stack_of_template_insertion_modes ); |
|
6006 return; |
|
6007 |
|
6008 /* |
|
6009 * > 12. If _node_ is a `head` element and _last_ is false, then switch the |
|
6010 * > insertion mode to "in head" and return. |
|
6011 */ |
|
6012 case 'HEAD': |
|
6013 if ( ! $last ) { |
|
6014 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_HEAD; |
|
6015 return; |
|
6016 } |
|
6017 break; |
|
6018 |
|
6019 /* |
|
6020 * > 13. If _node_ is a `body` element, then switch the insertion mode to "in body" |
|
6021 * > and return. |
|
6022 */ |
|
6023 case 'BODY': |
|
6024 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; |
|
6025 return; |
|
6026 |
|
6027 /* |
|
6028 * > 14. If _node_ is a `frameset` element, then switch the insertion mode to |
|
6029 * > "in frameset" and return. (fragment case) |
|
6030 */ |
|
6031 case 'FRAMESET': |
|
6032 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_FRAMESET; |
|
6033 return; |
|
6034 |
|
6035 /* |
|
6036 * > 15. If _node_ is an `html` element, run these substeps: |
|
6037 * > 1. If the head element pointer is null, switch the insertion mode to |
|
6038 * > "before head" and return. (fragment case) |
|
6039 * > 2. Otherwise, the head element pointer is not null, switch the insertion |
|
6040 * > mode to "after head" and return. |
|
6041 */ |
|
6042 case 'HTML': |
|
6043 $this->state->insertion_mode = isset( $this->state->head_element ) |
|
6044 ? WP_HTML_Processor_State::INSERTION_MODE_AFTER_HEAD |
|
6045 : WP_HTML_Processor_State::INSERTION_MODE_BEFORE_HEAD; |
|
6046 return; |
|
6047 } |
|
6048 } |
|
6049 |
|
6050 /* |
|
6051 * > 16. If _last_ is true, then switch the insertion mode to "in body" |
|
6052 * > and return. (fragment case) |
|
6053 * |
|
6054 * This is only reachable if `$last` is true, as per the fragment parsing case. |
|
6055 */ |
|
6056 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_BODY; |
|
2130 } |
6057 } |
2131 |
6058 |
2132 /** |
6059 /** |
2133 * Runs the adoption agency algorithm. |
6060 * Runs the adoption agency algorithm. |
2134 * |
6061 * |
2136 * |
6063 * |
2137 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
6064 * @throws WP_HTML_Unsupported_Exception When encountering unsupported HTML input. |
2138 * |
6065 * |
2139 * @see https://html.spec.whatwg.org/#adoption-agency-algorithm |
6066 * @see https://html.spec.whatwg.org/#adoption-agency-algorithm |
2140 */ |
6067 */ |
2141 private function run_adoption_agency_algorithm() { |
6068 private function run_adoption_agency_algorithm(): void { |
2142 $budget = 1000; |
6069 $budget = 1000; |
2143 $subject = $this->get_tag(); |
6070 $subject = $this->get_tag(); |
2144 $current_node = $this->state->stack_of_open_elements->current_node(); |
6071 $current_node = $this->state->stack_of_open_elements->current_node(); |
2145 |
6072 |
2146 if ( |
6073 if ( |
2177 } |
6104 } |
2178 } |
6105 } |
2179 |
6106 |
2180 // > If there is no such element, then return and instead act as described in the "any other end tag" entry above. |
6107 // > If there is no such element, then return and instead act as described in the "any other end tag" entry above. |
2181 if ( null === $formatting_element ) { |
6108 if ( null === $formatting_element ) { |
2182 $this->last_error = self::ERROR_UNSUPPORTED; |
6109 $this->bail( 'Cannot run adoption agency when "any other end tag" is required.' ); |
2183 throw new WP_HTML_Unsupported_Exception( 'Cannot run adoption agency when "any other end tag" is required.' ); |
|
2184 } |
6110 } |
2185 |
6111 |
2186 // > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return. |
6112 // > If formatting element is not in the stack of open elements, then this is a parse error; remove the element from the list, and return. |
2187 if ( ! $this->state->stack_of_open_elements->contains_node( $formatting_element ) ) { |
6113 if ( ! $this->state->stack_of_open_elements->contains_node( $formatting_element ) ) { |
2188 $this->state->active_formatting_elements->remove_node( $formatting_element ); |
6114 $this->state->active_formatting_elements->remove_node( $formatting_element ); |
2208 if ( $is_above_formatting_element ) { |
6134 if ( $is_above_formatting_element ) { |
2209 $is_above_formatting_element = false; |
6135 $is_above_formatting_element = false; |
2210 continue; |
6136 continue; |
2211 } |
6137 } |
2212 |
6138 |
2213 if ( self::is_special( $item->node_name ) ) { |
6139 if ( self::is_special( $item ) ) { |
2214 $furthest_block = $item; |
6140 $furthest_block = $item; |
2215 break; |
6141 break; |
2216 } |
6142 } |
2217 } |
6143 } |
2218 |
6144 |
2230 return; |
6156 return; |
2231 } |
6157 } |
2232 } |
6158 } |
2233 } |
6159 } |
2234 |
6160 |
2235 $this->last_error = self::ERROR_UNSUPPORTED; |
6161 $this->bail( 'Cannot extract common ancestor in adoption agency algorithm.' ); |
2236 throw new WP_HTML_Unsupported_Exception( 'Cannot extract common ancestor in adoption agency algorithm.' ); |
6162 } |
2237 } |
6163 |
2238 |
6164 $this->bail( 'Cannot run adoption agency when looping required.' ); |
2239 $this->last_error = self::ERROR_UNSUPPORTED; |
6165 } |
2240 throw new WP_HTML_Unsupported_Exception( 'Cannot run adoption agency when looping required.' ); |
6166 |
6167 /** |
|
6168 * Runs the "close the cell" algorithm. |
|
6169 * |
|
6170 * > Where the steps above say to close the cell, they mean to run the following algorithm: |
|
6171 * > 1. Generate implied end tags. |
|
6172 * > 2. If the current node is not now a td element or a th element, then this is a parse error. |
|
6173 * > 3. Pop elements from the stack of open elements stack until a td element or a th element has been popped from the stack. |
|
6174 * > 4. Clear the list of active formatting elements up to the last marker. |
|
6175 * > 5. Switch the insertion mode to "in row". |
|
6176 * |
|
6177 * @see https://html.spec.whatwg.org/multipage/parsing.html#close-the-cell |
|
6178 * |
|
6179 * @since 6.7.0 |
|
6180 */ |
|
6181 private function close_cell(): void { |
|
6182 $this->generate_implied_end_tags(); |
|
6183 // @todo Parse error if the current node is a "td" or "th" element. |
|
6184 foreach ( $this->state->stack_of_open_elements->walk_up() as $element ) { |
|
6185 $this->state->stack_of_open_elements->pop(); |
|
6186 if ( 'TD' === $element->node_name || 'TH' === $element->node_name ) { |
|
6187 break; |
|
6188 } |
|
6189 } |
|
6190 $this->state->active_formatting_elements->clear_up_to_last_marker(); |
|
6191 $this->state->insertion_mode = WP_HTML_Processor_State::INSERTION_MODE_IN_ROW; |
|
2241 } |
6192 } |
2242 |
6193 |
2243 /** |
6194 /** |
2244 * Inserts an HTML element on the stack of open elements. |
6195 * Inserts an HTML element on the stack of open elements. |
2245 * |
6196 * |
2247 * |
6198 * |
2248 * @see https://html.spec.whatwg.org/#insert-a-foreign-element |
6199 * @see https://html.spec.whatwg.org/#insert-a-foreign-element |
2249 * |
6200 * |
2250 * @param WP_HTML_Token $token Name of bookmark pointing to element in original input HTML. |
6201 * @param WP_HTML_Token $token Name of bookmark pointing to element in original input HTML. |
2251 */ |
6202 */ |
2252 private function insert_html_element( $token ) { |
6203 private function insert_html_element( WP_HTML_Token $token ): void { |
2253 $this->state->stack_of_open_elements->push( $token ); |
6204 $this->state->stack_of_open_elements->push( $token ); |
6205 } |
|
6206 |
|
6207 /** |
|
6208 * Inserts a foreign element on to the stack of open elements. |
|
6209 * |
|
6210 * @since 6.7.0 |
|
6211 * |
|
6212 * @see https://html.spec.whatwg.org/#insert-a-foreign-element |
|
6213 * |
|
6214 * @param WP_HTML_Token $token Insert this token. The token's namespace and |
|
6215 * insertion point will be updated correctly. |
|
6216 * @param bool $only_add_to_element_stack Whether to skip the "insert an element at the adjusted |
|
6217 * insertion location" algorithm when adding this element. |
|
6218 */ |
|
6219 private function insert_foreign_element( WP_HTML_Token $token, bool $only_add_to_element_stack ): void { |
|
6220 $adjusted_current_node = $this->get_adjusted_current_node(); |
|
6221 |
|
6222 $token->namespace = $adjusted_current_node ? $adjusted_current_node->namespace : 'html'; |
|
6223 |
|
6224 if ( $this->is_mathml_integration_point() ) { |
|
6225 $token->integration_node_type = 'math'; |
|
6226 } elseif ( $this->is_html_integration_point() ) { |
|
6227 $token->integration_node_type = 'html'; |
|
6228 } |
|
6229 |
|
6230 if ( false === $only_add_to_element_stack ) { |
|
6231 /* |
|
6232 * @todo Implement the "appropriate place for inserting a node" and the |
|
6233 * "insert an element at the adjusted insertion location" algorithms. |
|
6234 * |
|
6235 * These algorithms mostly impacts DOM tree construction and not the HTML API. |
|
6236 * Here, there's no DOM node onto which the element will be appended, so the |
|
6237 * parser will skip this step. |
|
6238 * |
|
6239 * @see https://html.spec.whatwg.org/#insert-an-element-at-the-adjusted-insertion-location |
|
6240 */ |
|
6241 } |
|
6242 |
|
6243 $this->insert_html_element( $token ); |
|
6244 } |
|
6245 |
|
6246 /** |
|
6247 * Inserts a virtual element on the stack of open elements. |
|
6248 * |
|
6249 * @since 6.7.0 |
|
6250 * |
|
6251 * @param string $token_name Name of token to create and insert into the stack of open elements. |
|
6252 * @param string|null $bookmark_name Optional. Name to give bookmark for created virtual node. |
|
6253 * Defaults to auto-creating a bookmark name. |
|
6254 * @return WP_HTML_Token Newly-created virtual token. |
|
6255 */ |
|
6256 private function insert_virtual_node( $token_name, $bookmark_name = null ): WP_HTML_Token { |
|
6257 $here = $this->bookmarks[ $this->state->current_token->bookmark_name ]; |
|
6258 $name = $bookmark_name ?? $this->bookmark_token(); |
|
6259 |
|
6260 $this->bookmarks[ $name ] = new WP_HTML_Span( $here->start, 0 ); |
|
6261 |
|
6262 $token = new WP_HTML_Token( $name, $token_name, false ); |
|
6263 $this->insert_html_element( $token ); |
|
6264 return $token; |
|
2254 } |
6265 } |
2255 |
6266 |
2256 /* |
6267 /* |
2257 * HTML Specification Helpers |
6268 * HTML Specification Helpers |
2258 */ |
6269 */ |
2259 |
6270 |
2260 /** |
6271 /** |
6272 * Indicates if the current token is a MathML integration point. |
|
6273 * |
|
6274 * @since 6.7.0 |
|
6275 * |
|
6276 * @see https://html.spec.whatwg.org/#mathml-text-integration-point |
|
6277 * |
|
6278 * @return bool Whether the current token is a MathML integration point. |
|
6279 */ |
|
6280 private function is_mathml_integration_point(): bool { |
|
6281 $current_token = $this->state->current_token; |
|
6282 if ( ! isset( $current_token ) ) { |
|
6283 return false; |
|
6284 } |
|
6285 |
|
6286 if ( 'math' !== $current_token->namespace || 'M' !== $current_token->node_name[0] ) { |
|
6287 return false; |
|
6288 } |
|
6289 |
|
6290 $tag_name = $current_token->node_name; |
|
6291 |
|
6292 return ( |
|
6293 'MI' === $tag_name || |
|
6294 'MO' === $tag_name || |
|
6295 'MN' === $tag_name || |
|
6296 'MS' === $tag_name || |
|
6297 'MTEXT' === $tag_name |
|
6298 ); |
|
6299 } |
|
6300 |
|
6301 /** |
|
6302 * Indicates if the current token is an HTML integration point. |
|
6303 * |
|
6304 * Note that this method must be an instance method with access |
|
6305 * to the current token, since it needs to examine the attributes |
|
6306 * of the currently-matched tag, if it's in the MathML namespace. |
|
6307 * Otherwise it would be required to scan the HTML and ensure that |
|
6308 * no other accounting is overlooked. |
|
6309 * |
|
6310 * @since 6.7.0 |
|
6311 * |
|
6312 * @see https://html.spec.whatwg.org/#html-integration-point |
|
6313 * |
|
6314 * @return bool Whether the current token is an HTML integration point. |
|
6315 */ |
|
6316 private function is_html_integration_point(): bool { |
|
6317 $current_token = $this->state->current_token; |
|
6318 if ( ! isset( $current_token ) ) { |
|
6319 return false; |
|
6320 } |
|
6321 |
|
6322 if ( 'html' === $current_token->namespace ) { |
|
6323 return false; |
|
6324 } |
|
6325 |
|
6326 $tag_name = $current_token->node_name; |
|
6327 |
|
6328 if ( 'svg' === $current_token->namespace ) { |
|
6329 return ( |
|
6330 'DESC' === $tag_name || |
|
6331 'FOREIGNOBJECT' === $tag_name || |
|
6332 'TITLE' === $tag_name |
|
6333 ); |
|
6334 } |
|
6335 |
|
6336 if ( 'math' === $current_token->namespace ) { |
|
6337 if ( 'ANNOTATION-XML' !== $tag_name ) { |
|
6338 return false; |
|
6339 } |
|
6340 |
|
6341 $encoding = $this->get_attribute( 'encoding' ); |
|
6342 |
|
6343 return ( |
|
6344 is_string( $encoding ) && |
|
6345 ( |
|
6346 0 === strcasecmp( $encoding, 'application/xhtml+xml' ) || |
|
6347 0 === strcasecmp( $encoding, 'text/html' ) |
|
6348 ) |
|
6349 ); |
|
6350 } |
|
6351 |
|
6352 $this->bail( 'Should not have reached end of HTML Integration Point detection: check HTML API code.' ); |
|
6353 // This unnecessary return prevents tools from inaccurately reporting type errors. |
|
6354 return false; |
|
6355 } |
|
6356 |
|
6357 /** |
|
2261 * Returns whether an element of a given name is in the HTML special category. |
6358 * Returns whether an element of a given name is in the HTML special category. |
2262 * |
6359 * |
2263 * @since 6.4.0 |
6360 * @since 6.4.0 |
2264 * |
6361 * |
2265 * @see https://html.spec.whatwg.org/#special |
6362 * @see https://html.spec.whatwg.org/#special |
2266 * |
6363 * |
2267 * @param string $tag_name Name of element to check. |
6364 * @param WP_HTML_Token|string $tag_name Node to check, or only its name if in the HTML namespace. |
2268 * @return bool Whether the element of the given name is in the special category. |
6365 * @return bool Whether the element of the given name is in the special category. |
2269 */ |
6366 */ |
2270 public static function is_special( $tag_name ) { |
6367 public static function is_special( $tag_name ): bool { |
2271 $tag_name = strtoupper( $tag_name ); |
6368 if ( is_string( $tag_name ) ) { |
6369 $tag_name = strtoupper( $tag_name ); |
|
6370 } else { |
|
6371 $tag_name = 'html' === $tag_name->namespace |
|
6372 ? strtoupper( $tag_name->node_name ) |
|
6373 : "{$tag_name->namespace} {$tag_name->node_name}"; |
|
6374 } |
|
2272 |
6375 |
2273 return ( |
6376 return ( |
2274 'ADDRESS' === $tag_name || |
6377 'ADDRESS' === $tag_name || |
2275 'APPLET' === $tag_name || |
6378 'APPLET' === $tag_name || |
2276 'AREA' === $tag_name || |
6379 'AREA' === $tag_name || |
2354 'UL' === $tag_name || |
6457 'UL' === $tag_name || |
2355 'WBR' === $tag_name || |
6458 'WBR' === $tag_name || |
2356 'XMP' === $tag_name || |
6459 'XMP' === $tag_name || |
2357 |
6460 |
2358 // MathML. |
6461 // MathML. |
2359 'MI' === $tag_name || |
6462 'math MI' === $tag_name || |
2360 'MO' === $tag_name || |
6463 'math MO' === $tag_name || |
2361 'MN' === $tag_name || |
6464 'math MN' === $tag_name || |
2362 'MS' === $tag_name || |
6465 'math MS' === $tag_name || |
2363 'MTEXT' === $tag_name || |
6466 'math MTEXT' === $tag_name || |
2364 'ANNOTATION-XML' === $tag_name || |
6467 'math ANNOTATION-XML' === $tag_name || |
2365 |
6468 |
2366 // SVG. |
6469 // SVG. |
2367 'FOREIGNOBJECT' === $tag_name || |
6470 'svg DESC' === $tag_name || |
2368 'DESC' === $tag_name || |
6471 'svg FOREIGNOBJECT' === $tag_name || |
2369 'TITLE' === $tag_name |
6472 'svg TITLE' === $tag_name |
2370 ); |
6473 ); |
2371 } |
6474 } |
2372 |
6475 |
2373 /** |
6476 /** |
2374 * Returns whether a given element is an HTML Void Element |
6477 * Returns whether a given element is an HTML Void Element |
2380 * @see https://html.spec.whatwg.org/#void-elements |
6483 * @see https://html.spec.whatwg.org/#void-elements |
2381 * |
6484 * |
2382 * @param string $tag_name Name of HTML tag to check. |
6485 * @param string $tag_name Name of HTML tag to check. |
2383 * @return bool Whether the given tag is an HTML Void Element. |
6486 * @return bool Whether the given tag is an HTML Void Element. |
2384 */ |
6487 */ |
2385 public static function is_void( $tag_name ) { |
6488 public static function is_void( $tag_name ): bool { |
2386 $tag_name = strtoupper( $tag_name ); |
6489 $tag_name = strtoupper( $tag_name ); |
2387 |
6490 |
2388 return ( |
6491 return ( |
2389 'AREA' === $tag_name || |
6492 'AREA' === $tag_name || |
2390 'BASE' === $tag_name || |
6493 'BASE' === $tag_name || |
2405 'TRACK' === $tag_name || |
6508 'TRACK' === $tag_name || |
2406 'WBR' === $tag_name |
6509 'WBR' === $tag_name |
2407 ); |
6510 ); |
2408 } |
6511 } |
2409 |
6512 |
6513 /** |
|
6514 * Gets an encoding from a given string. |
|
6515 * |
|
6516 * This is an algorithm defined in the WHAT-WG specification. |
|
6517 * |
|
6518 * Example: |
|
6519 * |
|
6520 * 'UTF-8' === self::get_encoding( 'utf8' ); |
|
6521 * 'UTF-8' === self::get_encoding( " \tUTF-8 " ); |
|
6522 * null === self::get_encoding( 'UTF-7' ); |
|
6523 * null === self::get_encoding( 'utf8; charset=' ); |
|
6524 * |
|
6525 * @see https://encoding.spec.whatwg.org/#concept-encoding-get |
|
6526 * |
|
6527 * @todo As this parser only supports UTF-8, only the UTF-8 |
|
6528 * encodings are detected. Add more as desired, but the |
|
6529 * parser will bail on non-UTF-8 encodings. |
|
6530 * |
|
6531 * @since 6.7.0 |
|
6532 * |
|
6533 * @param string $label A string which may specify a known encoding. |
|
6534 * @return string|null Known encoding if matched, otherwise null. |
|
6535 */ |
|
6536 protected static function get_encoding( string $label ): ?string { |
|
6537 /* |
|
6538 * > Remove any leading and trailing ASCII whitespace from label. |
|
6539 */ |
|
6540 $label = trim( $label, " \t\f\r\n" ); |
|
6541 |
|
6542 /* |
|
6543 * > If label is an ASCII case-insensitive match for any of the labels listed in the |
|
6544 * > table below, then return the corresponding encoding; otherwise return failure. |
|
6545 */ |
|
6546 switch ( strtolower( $label ) ) { |
|
6547 case 'unicode-1-1-utf-8': |
|
6548 case 'unicode11utf8': |
|
6549 case 'unicode20utf8': |
|
6550 case 'utf-8': |
|
6551 case 'utf8': |
|
6552 case 'x-unicode20utf8': |
|
6553 return 'UTF-8'; |
|
6554 |
|
6555 default: |
|
6556 return null; |
|
6557 } |
|
6558 } |
|
6559 |
|
2410 /* |
6560 /* |
2411 * Constants that would pollute the top of the class if they were found there. |
6561 * Constants that would pollute the top of the class if they were found there. |
2412 */ |
6562 */ |
2413 |
6563 |
2414 /** |
6564 /** |