29 * @param string $search_text Does the attribute value start with this plain string. |
29 * @param string $search_text Does the attribute value start with this plain string. |
30 * @param string $case_sensitivity Optional. Pass 'ascii-case-insensitive' to ignore ASCII case when matching. |
30 * @param string $case_sensitivity Optional. Pass 'ascii-case-insensitive' to ignore ASCII case when matching. |
31 * Default 'case-sensitive'. |
31 * Default 'case-sensitive'. |
32 * @return bool Whether the attribute value starts with the given string. |
32 * @return bool Whether the attribute value starts with the given string. |
33 */ |
33 */ |
34 public static function attribute_starts_with( $haystack, $search_text, $case_sensitivity = 'case-sensitive' ) { |
34 public static function attribute_starts_with( $haystack, $search_text, $case_sensitivity = 'case-sensitive' ): bool { |
35 $search_length = strlen( $search_text ); |
35 $search_length = strlen( $search_text ); |
36 $loose_case = 'ascii-case-insensitive' === $case_sensitivity; |
36 $loose_case = 'ascii-case-insensitive' === $case_sensitivity; |
37 $haystack_end = strlen( $haystack ); |
37 $haystack_end = strlen( $haystack ); |
38 $search_at = 0; |
38 $search_at = 0; |
39 $haystack_at = 0; |
39 $haystack_at = 0; |
88 * @since 6.6.0 |
88 * @since 6.6.0 |
89 * |
89 * |
90 * @param string $text Text containing raw and non-decoded text node to decode. |
90 * @param string $text Text containing raw and non-decoded text node to decode. |
91 * @return string Decoded UTF-8 value of given text node. |
91 * @return string Decoded UTF-8 value of given text node. |
92 */ |
92 */ |
93 public static function decode_text_node( $text ) { |
93 public static function decode_text_node( $text ): string { |
94 return static::decode( 'data', $text ); |
94 return static::decode( 'data', $text ); |
95 } |
95 } |
96 |
96 |
97 /** |
97 /** |
98 * Returns a string containing the decoded value of a given HTML attribute. |
98 * Returns a string containing the decoded value of a given HTML attribute. |
108 * @since 6.6.0 |
108 * @since 6.6.0 |
109 * |
109 * |
110 * @param string $text Text containing raw and non-decoded attribute value to decode. |
110 * @param string $text Text containing raw and non-decoded attribute value to decode. |
111 * @return string Decoded UTF-8 value of given attribute value. |
111 * @return string Decoded UTF-8 value of given attribute value. |
112 */ |
112 */ |
113 public static function decode_attribute( $text ) { |
113 public static function decode_attribute( $text ): string { |
114 return static::decode( 'attribute', $text ); |
114 return static::decode( 'attribute', $text ); |
115 } |
115 } |
116 |
116 |
117 /** |
117 /** |
118 * Decodes a span of HTML text, depending on the context in which it's found. |
118 * Decodes a span of HTML text, depending on the context in which it's found. |
131 * |
131 * |
132 * @param string $context `attribute` for decoding attribute values, `data` otherwise. |
132 * @param string $context `attribute` for decoding attribute values, `data` otherwise. |
133 * @param string $text Text document containing span of text to decode. |
133 * @param string $text Text document containing span of text to decode. |
134 * @return string Decoded UTF-8 string. |
134 * @return string Decoded UTF-8 string. |
135 */ |
135 */ |
136 public static function decode( $context, $text ) { |
136 public static function decode( $context, $text ): string { |
137 $decoded = ''; |
137 $decoded = ''; |
138 $end = strlen( $text ); |
138 $end = strlen( $text ); |
139 $at = 0; |
139 $at = 0; |
140 $was_at = 0; |
140 $was_at = 0; |
141 |
141 |
142 while ( $at < $end ) { |
142 while ( $at < $end ) { |
143 $next_character_reference_at = strpos( $text, '&', $at ); |
143 $next_character_reference_at = strpos( $text, '&', $at ); |
144 if ( false === $next_character_reference_at || $next_character_reference_at >= $end ) { |
144 if ( false === $next_character_reference_at ) { |
145 break; |
145 break; |
146 } |
146 } |
147 |
147 |
148 $character_reference = self::read_character_reference( $context, $text, $next_character_reference_at, $token_length ); |
148 $character_reference = self::read_character_reference( $context, $text, $next_character_reference_at, $token_length ); |
149 if ( isset( $character_reference ) ) { |
149 if ( isset( $character_reference ) ) { |
193 * 4 === $token_length; // `¬` |
193 * 4 === $token_length; // `¬` |
194 * '∉' === WP_HTML_Decoder::read_character_reference( 'data', '∉', 0, $token_length ); |
194 * '∉' === WP_HTML_Decoder::read_character_reference( 'data', '∉', 0, $token_length ); |
195 * 7 === $token_length; // `∉` |
195 * 7 === $token_length; // `∉` |
196 * |
196 * |
197 * @since 6.6.0 |
197 * @since 6.6.0 |
|
198 * |
|
199 * @global WP_Token_Map $html5_named_character_references Mappings for HTML5 named character references. |
198 * |
200 * |
199 * @param string $context `attribute` for decoding attribute values, `data` otherwise. |
201 * @param string $context `attribute` for decoding attribute values, `data` otherwise. |
200 * @param string $text Text document containing span of text to decode. |
202 * @param string $text Text document containing span of text to decode. |
201 * @param int $at Optional. Byte offset into text where span begins, defaults to the beginning (0). |
203 * @param int $at Optional. Byte offset into text where span begins, defaults to the beginning (0). |
202 * @param int &$match_byte_length Optional. Set to byte-length of character reference if provided and if a match |
204 * @param int &$match_byte_length Optional. Set to byte-length of character reference if provided and if a match |
419 * @see https://www.rfc-editor.org/rfc/rfc3629 For the UTF-8 standard. |
421 * @see https://www.rfc-editor.org/rfc/rfc3629 For the UTF-8 standard. |
420 * |
422 * |
421 * @param int $code_point Which code point to convert. |
423 * @param int $code_point Which code point to convert. |
422 * @return string Converted code point, or `�` if invalid. |
424 * @return string Converted code point, or `�` if invalid. |
423 */ |
425 */ |
424 public static function code_point_to_utf8_bytes( $code_point ) { |
426 public static function code_point_to_utf8_bytes( $code_point ): string { |
425 // Pre-check to ensure a valid code point. |
427 // Pre-check to ensure a valid code point. |
426 if ( |
428 if ( |
427 $code_point <= 0 || |
429 $code_point <= 0 || |
428 ( $code_point >= 0xD800 && $code_point <= 0xDFFF ) || |
430 ( $code_point >= 0xD800 && $code_point <= 0xDFFF ) || |
429 $code_point > 0x10FFFF |
431 $code_point > 0x10FFFF |
434 if ( $code_point <= 0x7F ) { |
436 if ( $code_point <= 0x7F ) { |
435 return chr( $code_point ); |
437 return chr( $code_point ); |
436 } |
438 } |
437 |
439 |
438 if ( $code_point <= 0x7FF ) { |
440 if ( $code_point <= 0x7FF ) { |
439 $byte1 = ( $code_point >> 6 ) | 0xC0; |
441 $byte1 = chr( ( $code_point >> 6 ) | 0xC0 ); |
440 $byte2 = $code_point & 0x3F | 0x80; |
442 $byte2 = chr( $code_point & 0x3F | 0x80 ); |
441 |
443 |
442 return pack( 'CC', $byte1, $byte2 ); |
444 return "{$byte1}{$byte2}"; |
443 } |
445 } |
444 |
446 |
445 if ( $code_point <= 0xFFFF ) { |
447 if ( $code_point <= 0xFFFF ) { |
446 $byte1 = ( $code_point >> 12 ) | 0xE0; |
448 $byte1 = chr( ( $code_point >> 12 ) | 0xE0 ); |
447 $byte2 = ( $code_point >> 6 ) & 0x3F | 0x80; |
449 $byte2 = chr( ( $code_point >> 6 ) & 0x3F | 0x80 ); |
448 $byte3 = $code_point & 0x3F | 0x80; |
450 $byte3 = chr( $code_point & 0x3F | 0x80 ); |
449 |
451 |
450 return pack( 'CCC', $byte1, $byte2, $byte3 ); |
452 return "{$byte1}{$byte2}{$byte3}"; |
451 } |
453 } |
452 |
454 |
453 // Any values above U+10FFFF are eliminated above in the pre-check. |
455 // Any values above U+10FFFF are eliminated above in the pre-check. |
454 $byte1 = ( $code_point >> 18 ) | 0xF0; |
456 $byte1 = chr( ( $code_point >> 18 ) | 0xF0 ); |
455 $byte2 = ( $code_point >> 12 ) & 0x3F | 0x80; |
457 $byte2 = chr( ( $code_point >> 12 ) & 0x3F | 0x80 ); |
456 $byte3 = ( $code_point >> 6 ) & 0x3F | 0x80; |
458 $byte3 = chr( ( $code_point >> 6 ) & 0x3F | 0x80 ); |
457 $byte4 = $code_point & 0x3F | 0x80; |
459 $byte4 = chr( $code_point & 0x3F | 0x80 ); |
458 |
460 |
459 return pack( 'CCCC', $byte1, $byte2, $byte3, $byte4 ); |
461 return "{$byte1}{$byte2}{$byte3}{$byte4}"; |
460 } |
462 } |
461 } |
463 } |