wp/wp-includes/html-api/class-wp-html-decoder.php
changeset 22 8c2e4d02f4ef
parent 21 48c4eec2b7e6
equal deleted inserted replaced
21:48c4eec2b7e6 22:8c2e4d02f4ef
    29 	 * @param string $search_text      Does the attribute value start with this plain string.
    29 	 * @param string $search_text      Does the attribute value start with this plain string.
    30 	 * @param string $case_sensitivity Optional. Pass 'ascii-case-insensitive' to ignore ASCII case when matching.
    30 	 * @param string $case_sensitivity Optional. Pass 'ascii-case-insensitive' to ignore ASCII case when matching.
    31 	 *                                 Default 'case-sensitive'.
    31 	 *                                 Default 'case-sensitive'.
    32 	 * @return bool Whether the attribute value starts with the given string.
    32 	 * @return bool Whether the attribute value starts with the given string.
    33 	 */
    33 	 */
    34 	public static function attribute_starts_with( $haystack, $search_text, $case_sensitivity = 'case-sensitive' ) {
    34 	public static function attribute_starts_with( $haystack, $search_text, $case_sensitivity = 'case-sensitive' ): bool {
    35 		$search_length = strlen( $search_text );
    35 		$search_length = strlen( $search_text );
    36 		$loose_case    = 'ascii-case-insensitive' === $case_sensitivity;
    36 		$loose_case    = 'ascii-case-insensitive' === $case_sensitivity;
    37 		$haystack_end  = strlen( $haystack );
    37 		$haystack_end  = strlen( $haystack );
    38 		$search_at     = 0;
    38 		$search_at     = 0;
    39 		$haystack_at   = 0;
    39 		$haystack_at   = 0;
    88 	 * @since 6.6.0
    88 	 * @since 6.6.0
    89 	 *
    89 	 *
    90 	 * @param string $text Text containing raw and non-decoded text node to decode.
    90 	 * @param string $text Text containing raw and non-decoded text node to decode.
    91 	 * @return string Decoded UTF-8 value of given text node.
    91 	 * @return string Decoded UTF-8 value of given text node.
    92 	 */
    92 	 */
    93 	public static function decode_text_node( $text ) {
    93 	public static function decode_text_node( $text ): string {
    94 		return static::decode( 'data', $text );
    94 		return static::decode( 'data', $text );
    95 	}
    95 	}
    96 
    96 
    97 	/**
    97 	/**
    98 	 * Returns a string containing the decoded value of a given HTML attribute.
    98 	 * Returns a string containing the decoded value of a given HTML attribute.
   108 	 * @since 6.6.0
   108 	 * @since 6.6.0
   109 	 *
   109 	 *
   110 	 * @param string $text Text containing raw and non-decoded attribute value to decode.
   110 	 * @param string $text Text containing raw and non-decoded attribute value to decode.
   111 	 * @return string Decoded UTF-8 value of given attribute value.
   111 	 * @return string Decoded UTF-8 value of given attribute value.
   112 	 */
   112 	 */
   113 	public static function decode_attribute( $text ) {
   113 	public static function decode_attribute( $text ): string {
   114 		return static::decode( 'attribute', $text );
   114 		return static::decode( 'attribute', $text );
   115 	}
   115 	}
   116 
   116 
   117 	/**
   117 	/**
   118 	 * Decodes a span of HTML text, depending on the context in which it's found.
   118 	 * Decodes a span of HTML text, depending on the context in which it's found.
   131 	 *
   131 	 *
   132 	 * @param string $context `attribute` for decoding attribute values, `data` otherwise.
   132 	 * @param string $context `attribute` for decoding attribute values, `data` otherwise.
   133 	 * @param string $text    Text document containing span of text to decode.
   133 	 * @param string $text    Text document containing span of text to decode.
   134 	 * @return string Decoded UTF-8 string.
   134 	 * @return string Decoded UTF-8 string.
   135 	 */
   135 	 */
   136 	public static function decode( $context, $text ) {
   136 	public static function decode( $context, $text ): string {
   137 		$decoded = '';
   137 		$decoded = '';
   138 		$end     = strlen( $text );
   138 		$end     = strlen( $text );
   139 		$at      = 0;
   139 		$at      = 0;
   140 		$was_at  = 0;
   140 		$was_at  = 0;
   141 
   141 
   142 		while ( $at < $end ) {
   142 		while ( $at < $end ) {
   143 			$next_character_reference_at = strpos( $text, '&', $at );
   143 			$next_character_reference_at = strpos( $text, '&', $at );
   144 			if ( false === $next_character_reference_at || $next_character_reference_at >= $end ) {
   144 			if ( false === $next_character_reference_at ) {
   145 				break;
   145 				break;
   146 			}
   146 			}
   147 
   147 
   148 			$character_reference = self::read_character_reference( $context, $text, $next_character_reference_at, $token_length );
   148 			$character_reference = self::read_character_reference( $context, $text, $next_character_reference_at, $token_length );
   149 			if ( isset( $character_reference ) ) {
   149 			if ( isset( $character_reference ) ) {
   193 	 *     4    === $token_length; // `&not`
   193 	 *     4    === $token_length; // `&not`
   194 	 *     '∉'  === WP_HTML_Decoder::read_character_reference( 'data', '&notin;', 0, $token_length );
   194 	 *     '∉'  === WP_HTML_Decoder::read_character_reference( 'data', '&notin;', 0, $token_length );
   195 	 *     7    === $token_length; // `&notin;`
   195 	 *     7    === $token_length; // `&notin;`
   196 	 *
   196 	 *
   197 	 * @since 6.6.0
   197 	 * @since 6.6.0
       
   198 	 *
       
   199 	 * @global WP_Token_Map $html5_named_character_references Mappings for HTML5 named character references.
   198 	 *
   200 	 *
   199 	 * @param string $context            `attribute` for decoding attribute values, `data` otherwise.
   201 	 * @param string $context            `attribute` for decoding attribute values, `data` otherwise.
   200 	 * @param string $text               Text document containing span of text to decode.
   202 	 * @param string $text               Text document containing span of text to decode.
   201 	 * @param int    $at                 Optional. Byte offset into text where span begins, defaults to the beginning (0).
   203 	 * @param int    $at                 Optional. Byte offset into text where span begins, defaults to the beginning (0).
   202 	 * @param int    &$match_byte_length Optional. Set to byte-length of character reference if provided and if a match
   204 	 * @param int    &$match_byte_length Optional. Set to byte-length of character reference if provided and if a match
   419 	 * @see https://www.rfc-editor.org/rfc/rfc3629 For the UTF-8 standard.
   421 	 * @see https://www.rfc-editor.org/rfc/rfc3629 For the UTF-8 standard.
   420 	 *
   422 	 *
   421 	 * @param int $code_point Which code point to convert.
   423 	 * @param int $code_point Which code point to convert.
   422 	 * @return string Converted code point, or `�` if invalid.
   424 	 * @return string Converted code point, or `�` if invalid.
   423 	 */
   425 	 */
   424 	public static function code_point_to_utf8_bytes( $code_point ) {
   426 	public static function code_point_to_utf8_bytes( $code_point ): string {
   425 		// Pre-check to ensure a valid code point.
   427 		// Pre-check to ensure a valid code point.
   426 		if (
   428 		if (
   427 			$code_point <= 0 ||
   429 			$code_point <= 0 ||
   428 			( $code_point >= 0xD800 && $code_point <= 0xDFFF ) ||
   430 			( $code_point >= 0xD800 && $code_point <= 0xDFFF ) ||
   429 			$code_point > 0x10FFFF
   431 			$code_point > 0x10FFFF
   434 		if ( $code_point <= 0x7F ) {
   436 		if ( $code_point <= 0x7F ) {
   435 			return chr( $code_point );
   437 			return chr( $code_point );
   436 		}
   438 		}
   437 
   439 
   438 		if ( $code_point <= 0x7FF ) {
   440 		if ( $code_point <= 0x7FF ) {
   439 			$byte1 = ( $code_point >> 6 ) | 0xC0;
   441 			$byte1 = chr( ( $code_point >> 6 ) | 0xC0 );
   440 			$byte2 = $code_point & 0x3F | 0x80;
   442 			$byte2 = chr( $code_point & 0x3F | 0x80 );
   441 
   443 
   442 			return pack( 'CC', $byte1, $byte2 );
   444 			return "{$byte1}{$byte2}";
   443 		}
   445 		}
   444 
   446 
   445 		if ( $code_point <= 0xFFFF ) {
   447 		if ( $code_point <= 0xFFFF ) {
   446 			$byte1 = ( $code_point >> 12 ) | 0xE0;
   448 			$byte1 = chr( ( $code_point >> 12 ) | 0xE0 );
   447 			$byte2 = ( $code_point >> 6 ) & 0x3F | 0x80;
   449 			$byte2 = chr( ( $code_point >> 6 ) & 0x3F | 0x80 );
   448 			$byte3 = $code_point & 0x3F | 0x80;
   450 			$byte3 = chr( $code_point & 0x3F | 0x80 );
   449 
   451 
   450 			return pack( 'CCC', $byte1, $byte2, $byte3 );
   452 			return "{$byte1}{$byte2}{$byte3}";
   451 		}
   453 		}
   452 
   454 
   453 		// Any values above U+10FFFF are eliminated above in the pre-check.
   455 		// Any values above U+10FFFF are eliminated above in the pre-check.
   454 		$byte1 = ( $code_point >> 18 ) | 0xF0;
   456 		$byte1 = chr( ( $code_point >> 18 ) | 0xF0 );
   455 		$byte2 = ( $code_point >> 12 ) & 0x3F | 0x80;
   457 		$byte2 = chr( ( $code_point >> 12 ) & 0x3F | 0x80 );
   456 		$byte3 = ( $code_point >> 6 ) & 0x3F | 0x80;
   458 		$byte3 = chr( ( $code_point >> 6 ) & 0x3F | 0x80 );
   457 		$byte4 = $code_point & 0x3F | 0x80;
   459 		$byte4 = chr( $code_point & 0x3F | 0x80 );
   458 
   460 
   459 		return pack( 'CCCC', $byte1, $byte2, $byte3, $byte4 );
   461 		return "{$byte1}{$byte2}{$byte3}{$byte4}";
   460 	}
   462 	}
   461 }
   463 }