wp/wp-includes/rest-api/endpoints/class-wp-rest-url-details-controller.php
changeset 19 3d72ae0968f4
child 21 48c4eec2b7e6
equal deleted inserted replaced
18:be944660c56a 19:3d72ae0968f4
       
     1 <?php
       
     2 /**
       
     3  * REST API: WP_REST_URL_Details_Controller class
       
     4  *
       
     5  * @package WordPress
       
     6  * @subpackage REST_API
       
     7  * @since 5.9.0
       
     8  */
       
     9 
       
    10 /**
       
    11  * Controller which provides REST endpoint for retrieving information
       
    12  * from a remote site's HTML response.
       
    13  *
       
    14  * @since 5.9.0
       
    15  *
       
    16  * @see WP_REST_Controller
       
    17  */
       
    18 class WP_REST_URL_Details_Controller extends WP_REST_Controller {
       
    19 
       
    20 	/**
       
    21 	 * Constructs the controller.
       
    22 	 *
       
    23 	 * @since 5.9.0
       
    24 	 */
       
    25 	public function __construct() {
       
    26 		$this->namespace = 'wp-block-editor/v1';
       
    27 		$this->rest_base = 'url-details';
       
    28 	}
       
    29 
       
    30 	/**
       
    31 	 * Registers the necessary REST API routes.
       
    32 	 *
       
    33 	 * @since 5.9.0
       
    34 	 */
       
    35 	public function register_routes() {
       
    36 		register_rest_route(
       
    37 			$this->namespace,
       
    38 			'/' . $this->rest_base,
       
    39 			array(
       
    40 				array(
       
    41 					'methods'             => WP_REST_Server::READABLE,
       
    42 					'callback'            => array( $this, 'parse_url_details' ),
       
    43 					'args'                => array(
       
    44 						'url' => array(
       
    45 							'required'          => true,
       
    46 							'description'       => __( 'The URL to process.' ),
       
    47 							'validate_callback' => 'wp_http_validate_url',
       
    48 							'sanitize_callback' => 'esc_url_raw',
       
    49 							'type'              => 'string',
       
    50 							'format'            => 'uri',
       
    51 						),
       
    52 					),
       
    53 					'permission_callback' => array( $this, 'permissions_check' ),
       
    54 					'schema'              => array( $this, 'get_public_item_schema' ),
       
    55 				),
       
    56 			)
       
    57 		);
       
    58 	}
       
    59 
       
    60 	/**
       
    61 	 * Retrieves the item's schema, conforming to JSON Schema.
       
    62 	 *
       
    63 	 * @since 5.9.0
       
    64 	 *
       
    65 	 * @return array Item schema data.
       
    66 	 */
       
    67 	public function get_item_schema() {
       
    68 		if ( $this->schema ) {
       
    69 			return $this->add_additional_fields_schema( $this->schema );
       
    70 		}
       
    71 
       
    72 		$this->schema = array(
       
    73 			'$schema'    => 'http://json-schema.org/draft-04/schema#',
       
    74 			'title'      => 'url-details',
       
    75 			'type'       => 'object',
       
    76 			'properties' => array(
       
    77 				'title'       => array(
       
    78 					'description' => sprintf(
       
    79 						/* translators: %s: HTML title tag. */
       
    80 						__( 'The contents of the %s element from the URL.' ),
       
    81 						'<title>'
       
    82 					),
       
    83 					'type'        => 'string',
       
    84 					'context'     => array( 'view', 'edit', 'embed' ),
       
    85 					'readonly'    => true,
       
    86 				),
       
    87 				'icon'        => array(
       
    88 					'description' => sprintf(
       
    89 						/* translators: %s: HTML link tag. */
       
    90 						__( 'The favicon image link of the %s element from the URL.' ),
       
    91 						'<link rel="icon">'
       
    92 					),
       
    93 					'type'        => 'string',
       
    94 					'format'      => 'uri',
       
    95 					'context'     => array( 'view', 'edit', 'embed' ),
       
    96 					'readonly'    => true,
       
    97 				),
       
    98 				'description' => array(
       
    99 					'description' => sprintf(
       
   100 						/* translators: %s: HTML meta tag. */
       
   101 						__( 'The content of the %s element from the URL.' ),
       
   102 						'<meta name="description">'
       
   103 					),
       
   104 					'type'        => 'string',
       
   105 					'context'     => array( 'view', 'edit', 'embed' ),
       
   106 					'readonly'    => true,
       
   107 				),
       
   108 				'image'       => array(
       
   109 					'description' => sprintf(
       
   110 						/* translators: 1: HTML meta tag, 2: HTML meta tag. */
       
   111 						__( 'The Open Graph image link of the %1$s or %2$s element from the URL.' ),
       
   112 						'<meta property="og:image">',
       
   113 						'<meta property="og:image:url">'
       
   114 					),
       
   115 					'type'        => 'string',
       
   116 					'format'      => 'uri',
       
   117 					'context'     => array( 'view', 'edit', 'embed' ),
       
   118 					'readonly'    => true,
       
   119 				),
       
   120 			),
       
   121 		);
       
   122 
       
   123 		return $this->add_additional_fields_schema( $this->schema );
       
   124 	}
       
   125 
       
   126 	/**
       
   127 	 * Retrieves the contents of the title tag from the HTML response.
       
   128 	 *
       
   129 	 * @since 5.9.0
       
   130 	 *
       
   131 	 * @param WP_REST_REQUEST $request Full details about the request.
       
   132 	 * @return WP_REST_Response|WP_Error The parsed details as a response object. WP_Error if there are errors.
       
   133 	 */
       
   134 	public function parse_url_details( $request ) {
       
   135 		$url = untrailingslashit( $request['url'] );
       
   136 
       
   137 		if ( empty( $url ) ) {
       
   138 			return new WP_Error( 'rest_invalid_url', __( 'Invalid URL' ), array( 'status' => 404 ) );
       
   139 		}
       
   140 
       
   141 		// Transient per URL.
       
   142 		$cache_key = $this->build_cache_key_for_url( $url );
       
   143 
       
   144 		// Attempt to retrieve cached response.
       
   145 		$cached_response = $this->get_cache( $cache_key );
       
   146 
       
   147 		if ( ! empty( $cached_response ) ) {
       
   148 			$remote_url_response = $cached_response;
       
   149 		} else {
       
   150 			$remote_url_response = $this->get_remote_url( $url );
       
   151 
       
   152 			// Exit if we don't have a valid body or it's empty.
       
   153 			if ( is_wp_error( $remote_url_response ) || empty( $remote_url_response ) ) {
       
   154 				return $remote_url_response;
       
   155 			}
       
   156 
       
   157 			// Cache the valid response.
       
   158 			$this->set_cache( $cache_key, $remote_url_response );
       
   159 		}
       
   160 
       
   161 		$html_head     = $this->get_document_head( $remote_url_response );
       
   162 		$meta_elements = $this->get_meta_with_content_elements( $html_head );
       
   163 
       
   164 		$data = $this->add_additional_fields_to_object(
       
   165 			array(
       
   166 				'title'       => $this->get_title( $html_head ),
       
   167 				'icon'        => $this->get_icon( $html_head, $url ),
       
   168 				'description' => $this->get_description( $meta_elements ),
       
   169 				'image'       => $this->get_image( $meta_elements, $url ),
       
   170 			),
       
   171 			$request
       
   172 		);
       
   173 
       
   174 		// Wrap the data in a response object.
       
   175 		$response = rest_ensure_response( $data );
       
   176 
       
   177 		/**
       
   178 		 * Filters the URL data for the response.
       
   179 		 *
       
   180 		 * @since 5.9.0
       
   181 		 *
       
   182 		 * @param WP_REST_Response $response            The response object.
       
   183 		 * @param string           $url                 The requested URL.
       
   184 		 * @param WP_REST_Request  $request             Request object.
       
   185 		 * @param string           $remote_url_response HTTP response body from the remote URL.
       
   186 		 */
       
   187 		return apply_filters( 'rest_prepare_url_details', $response, $url, $request, $remote_url_response );
       
   188 	}
       
   189 
       
   190 	/**
       
   191 	 * Checks whether a given request has permission to read remote URLs.
       
   192 	 *
       
   193 	 * @since 5.9.0
       
   194 	 *
       
   195 	 * @return WP_Error|bool True if the request has permission, else WP_Error.
       
   196 	 */
       
   197 	public function permissions_check() {
       
   198 		if ( current_user_can( 'edit_posts' ) ) {
       
   199 			return true;
       
   200 		}
       
   201 
       
   202 		foreach ( get_post_types( array( 'show_in_rest' => true ), 'objects' ) as $post_type ) {
       
   203 			if ( current_user_can( $post_type->cap->edit_posts ) ) {
       
   204 				return true;
       
   205 			}
       
   206 		}
       
   207 
       
   208 		return new WP_Error(
       
   209 			'rest_cannot_view_url_details',
       
   210 			__( 'Sorry, you are not allowed to process remote URLs.' ),
       
   211 			array( 'status' => rest_authorization_required_code() )
       
   212 		);
       
   213 	}
       
   214 
       
   215 	/**
       
   216 	 * Retrieves the document title from a remote URL.
       
   217 	 *
       
   218 	 * @since 5.9.0
       
   219 	 *
       
   220 	 * @param string $url The website URL whose HTML to access.
       
   221 	 * @return string|WP_Error The HTTP response from the remote URL on success.
       
   222 	 *                         WP_Error if no response or no content.
       
   223 	 */
       
   224 	private function get_remote_url( $url ) {
       
   225 
       
   226 		/*
       
   227 		 * Provide a modified UA string to workaround web properties which block WordPress "Pingbacks".
       
   228 		 * Why? The UA string used for pingback requests contains `WordPress/` which is very similar
       
   229 		 * to that used as the default UA string by the WP HTTP API. Therefore requests from this
       
   230 		 * REST endpoint are being unintentionally blocked as they are misidentified as pingback requests.
       
   231 		 * By slightly modifying the UA string, but still retaining the "WordPress" identification (via "WP")
       
   232 		 * we are able to work around this issue.
       
   233 		 * Example UA string: `WP-URLDetails/5.9-alpha-51389 (+http://localhost:8888)`.
       
   234 		*/
       
   235 		$modified_user_agent = 'WP-URLDetails/' . get_bloginfo( 'version' ) . ' (+' . get_bloginfo( 'url' ) . ')';
       
   236 
       
   237 		$args = array(
       
   238 			'limit_response_size' => 150 * KB_IN_BYTES,
       
   239 			'user-agent'          => $modified_user_agent,
       
   240 		);
       
   241 
       
   242 		/**
       
   243 		 * Filters the HTTP request args for URL data retrieval.
       
   244 		 *
       
   245 		 * Can be used to adjust response size limit and other WP_Http::request() args.
       
   246 		 *
       
   247 		 * @since 5.9.0
       
   248 		 *
       
   249 		 * @param array  $args Arguments used for the HTTP request.
       
   250 		 * @param string $url  The attempted URL.
       
   251 		 */
       
   252 		$args = apply_filters( 'rest_url_details_http_request_args', $args, $url );
       
   253 
       
   254 		$response = wp_safe_remote_get( $url, $args );
       
   255 
       
   256 		if ( WP_Http::OK !== wp_remote_retrieve_response_code( $response ) ) {
       
   257 			// Not saving the error response to cache since the error might be temporary.
       
   258 			return new WP_Error(
       
   259 				'no_response',
       
   260 				__( 'URL not found. Response returned a non-200 status code for this URL.' ),
       
   261 				array( 'status' => WP_Http::NOT_FOUND )
       
   262 			);
       
   263 		}
       
   264 
       
   265 		$remote_body = wp_remote_retrieve_body( $response );
       
   266 
       
   267 		if ( empty( $remote_body ) ) {
       
   268 			return new WP_Error(
       
   269 				'no_content',
       
   270 				__( 'Unable to retrieve body from response at this URL.' ),
       
   271 				array( 'status' => WP_Http::NOT_FOUND )
       
   272 			);
       
   273 		}
       
   274 
       
   275 		return $remote_body;
       
   276 	}
       
   277 
       
   278 	/**
       
   279 	 * Parses the title tag contents from the provided HTML.
       
   280 	 *
       
   281 	 * @since 5.9.0
       
   282 	 *
       
   283 	 * @param string $html The HTML from the remote website at URL.
       
   284 	 * @return string The title tag contents on success. Empty string if not found.
       
   285 	 */
       
   286 	private function get_title( $html ) {
       
   287 		$pattern = '#<title[^>]*>(.*?)<\s*/\s*title>#is';
       
   288 		preg_match( $pattern, $html, $match_title );
       
   289 
       
   290 		if ( empty( $match_title[1] ) || ! is_string( $match_title[1] ) ) {
       
   291 			return '';
       
   292 		}
       
   293 
       
   294 		$title = trim( $match_title[1] );
       
   295 
       
   296 		return $this->prepare_metadata_for_output( $title );
       
   297 	}
       
   298 
       
   299 	/**
       
   300 	 * Parses the site icon from the provided HTML.
       
   301 	 *
       
   302 	 * @since 5.9.0
       
   303 	 *
       
   304 	 * @param string $html The HTML from the remote website at URL.
       
   305 	 * @param string $url  The target website URL.
       
   306 	 * @return string The icon URI on success. Empty string if not found.
       
   307 	 */
       
   308 	private function get_icon( $html, $url ) {
       
   309 		// Grab the icon's link element.
       
   310 		$pattern = '#<link\s[^>]*rel=(?:[\"\']??)\s*(?:icon|shortcut icon|icon shortcut)\s*(?:[\"\']??)[^>]*\/?>#isU';
       
   311 		preg_match( $pattern, $html, $element );
       
   312 		if ( empty( $element[0] ) || ! is_string( $element[0] ) ) {
       
   313 			return '';
       
   314 		}
       
   315 		$element = trim( $element[0] );
       
   316 
       
   317 		// Get the icon's href value.
       
   318 		$pattern = '#href=([\"\']??)([^\" >]*?)\\1[^>]*#isU';
       
   319 		preg_match( $pattern, $element, $icon );
       
   320 		if ( empty( $icon[2] ) || ! is_string( $icon[2] ) ) {
       
   321 			return '';
       
   322 		}
       
   323 		$icon = trim( $icon[2] );
       
   324 
       
   325 		// If the icon is a data URL, return it.
       
   326 		$parsed_icon = parse_url( $icon );
       
   327 		if ( isset( $parsed_icon['scheme'] ) && 'data' === $parsed_icon['scheme'] ) {
       
   328 			return $icon;
       
   329 		}
       
   330 
       
   331 		// Attempt to convert relative URLs to absolute.
       
   332 		if ( ! is_string( $url ) || '' === $url ) {
       
   333 			return $icon;
       
   334 		}
       
   335 		$parsed_url = parse_url( $url );
       
   336 		if ( isset( $parsed_url['scheme'] ) && isset( $parsed_url['host'] ) ) {
       
   337 			$root_url = $parsed_url['scheme'] . '://' . $parsed_url['host'] . '/';
       
   338 			$icon     = WP_Http::make_absolute_url( $icon, $root_url );
       
   339 		}
       
   340 
       
   341 		return $icon;
       
   342 	}
       
   343 
       
   344 	/**
       
   345 	 * Parses the meta description from the provided HTML.
       
   346 	 *
       
   347 	 * @since 5.9.0
       
   348 	 *
       
   349 	 * @param array $meta_elements {
       
   350 	 *     A multi-dimensional indexed array on success, else empty array.
       
   351 	 *
       
   352 	 *     @type string[] $0 Meta elements with a content attribute.
       
   353 	 *     @type string[] $1 Content attribute's opening quotation mark.
       
   354 	 *     @type string[] $2 Content attribute's value for each meta element.
       
   355 	 * }
       
   356 	 * @return string The meta description contents on success. Empty string if not found.
       
   357 	 */
       
   358 	private function get_description( $meta_elements ) {
       
   359 		// Bail out if there are no meta elements.
       
   360 		if ( empty( $meta_elements[0] ) ) {
       
   361 			return '';
       
   362 		}
       
   363 
       
   364 		$description = $this->get_metadata_from_meta_element(
       
   365 			$meta_elements,
       
   366 			'name',
       
   367 			'(?:description|og:description)'
       
   368 		);
       
   369 
       
   370 		// Bail out if description not found.
       
   371 		if ( '' === $description ) {
       
   372 			return '';
       
   373 		}
       
   374 
       
   375 		return $this->prepare_metadata_for_output( $description );
       
   376 	}
       
   377 
       
   378 	/**
       
   379 	 * Parses the Open Graph (OG) Image from the provided HTML.
       
   380 	 *
       
   381 	 * See: https://ogp.me/.
       
   382 	 *
       
   383 	 * @since 5.9.0
       
   384 	 *
       
   385 	 * @param array  $meta_elements {
       
   386 	 *     A multi-dimensional indexed array on success, else empty array.
       
   387 	 *
       
   388 	 *     @type string[] $0 Meta elements with a content attribute.
       
   389 	 *     @type string[] $1 Content attribute's opening quotation mark.
       
   390 	 *     @type string[] $2 Content attribute's value for each meta element.
       
   391 	 * }
       
   392 	 * @param string $url The target website URL.
       
   393 	 * @return string The OG image on success. Empty string if not found.
       
   394 	 */
       
   395 	private function get_image( $meta_elements, $url ) {
       
   396 		$image = $this->get_metadata_from_meta_element(
       
   397 			$meta_elements,
       
   398 			'property',
       
   399 			'(?:og:image|og:image:url)'
       
   400 		);
       
   401 
       
   402 		// Bail out if image not found.
       
   403 		if ( '' === $image ) {
       
   404 			return '';
       
   405 		}
       
   406 
       
   407 		// Attempt to convert relative URLs to absolute.
       
   408 		$parsed_url = parse_url( $url );
       
   409 		if ( isset( $parsed_url['scheme'] ) && isset( $parsed_url['host'] ) ) {
       
   410 			$root_url = $parsed_url['scheme'] . '://' . $parsed_url['host'] . '/';
       
   411 			$image    = WP_Http::make_absolute_url( $image, $root_url );
       
   412 		}
       
   413 
       
   414 		return $image;
       
   415 	}
       
   416 
       
   417 	/**
       
   418 	 * Prepares the metadata by:
       
   419 	 *    - stripping all HTML tags and tag entities.
       
   420 	 *    - converting non-tag entities into characters.
       
   421 	 *
       
   422 	 * @since 5.9.0
       
   423 	 *
       
   424 	 * @param string $metadata The metadata content to prepare.
       
   425 	 * @return string The prepared metadata.
       
   426 	 */
       
   427 	private function prepare_metadata_for_output( $metadata ) {
       
   428 		$metadata = html_entity_decode( $metadata, ENT_QUOTES, get_bloginfo( 'charset' ) );
       
   429 		$metadata = wp_strip_all_tags( $metadata );
       
   430 		return $metadata;
       
   431 	}
       
   432 
       
   433 	/**
       
   434 	 * Utility function to build cache key for a given URL.
       
   435 	 *
       
   436 	 * @since 5.9.0
       
   437 	 *
       
   438 	 * @param string $url The URL for which to build a cache key.
       
   439 	 * @return string The cache key.
       
   440 	 */
       
   441 	private function build_cache_key_for_url( $url ) {
       
   442 		return 'g_url_details_response_' . md5( $url );
       
   443 	}
       
   444 
       
   445 	/**
       
   446 	 * Utility function to retrieve a value from the cache at a given key.
       
   447 	 *
       
   448 	 * @since 5.9.0
       
   449 	 *
       
   450 	 * @param string $key The cache key.
       
   451 	 * @return mixed The value from the cache.
       
   452 	 */
       
   453 	private function get_cache( $key ) {
       
   454 		return get_site_transient( $key );
       
   455 	}
       
   456 
       
   457 	/**
       
   458 	 * Utility function to cache a given data set at a given cache key.
       
   459 	 *
       
   460 	 * @since 5.9.0
       
   461 	 *
       
   462 	 * @param string $key  The cache key under which to store the value.
       
   463 	 * @param string $data The data to be stored at the given cache key.
       
   464 	 * @return bool True when transient set. False if not set.
       
   465 	 */
       
   466 	private function set_cache( $key, $data = '' ) {
       
   467 		$ttl = HOUR_IN_SECONDS;
       
   468 
       
   469 		/**
       
   470 		 * Filters the cache expiration.
       
   471 		 *
       
   472 		 * Can be used to adjust the time until expiration in seconds for the cache
       
   473 		 * of the data retrieved for the given URL.
       
   474 		 *
       
   475 		 * @since 5.9.0
       
   476 		 *
       
   477 		 * @param int $ttl The time until cache expiration in seconds.
       
   478 		 */
       
   479 		$cache_expiration = apply_filters( 'rest_url_details_cache_expiration', $ttl );
       
   480 
       
   481 		return set_site_transient( $key, $data, $cache_expiration );
       
   482 	}
       
   483 
       
   484 	/**
       
   485 	 * Retrieves the head element section.
       
   486 	 *
       
   487 	 * @since 5.9.0
       
   488 	 *
       
   489 	 * @param string $html The string of HTML to parse.
       
   490 	 * @return string The `<head>..</head>` section on success. Given `$html` if not found.
       
   491 	 */
       
   492 	private function get_document_head( $html ) {
       
   493 		$head_html = $html;
       
   494 
       
   495 		// Find the opening `<head>` tag.
       
   496 		$head_start = strpos( $html, '<head' );
       
   497 		if ( false === $head_start ) {
       
   498 			// Didn't find it. Return the original HTML.
       
   499 			return $html;
       
   500 		}
       
   501 
       
   502 		// Find the closing `</head>` tag.
       
   503 		$head_end = strpos( $head_html, '</head>' );
       
   504 		if ( false === $head_end ) {
       
   505 			// Didn't find it. Find the opening `<body>` tag.
       
   506 			$head_end = strpos( $head_html, '<body' );
       
   507 
       
   508 			// Didn't find it. Return the original HTML.
       
   509 			if ( false === $head_end ) {
       
   510 				return $html;
       
   511 			}
       
   512 		}
       
   513 
       
   514 		// Extract the HTML from opening tag to the closing tag. Then add the closing tag.
       
   515 		$head_html  = substr( $head_html, $head_start, $head_end );
       
   516 		$head_html .= '</head>';
       
   517 
       
   518 		return $head_html;
       
   519 	}
       
   520 
       
   521 	/**
       
   522 	 * Gets all the meta tag elements that have a 'content' attribute.
       
   523 	 *
       
   524 	 * @since 5.9.0
       
   525 	 *
       
   526 	 * @param string $html The string of HTML to be parsed.
       
   527 	 * @return array {
       
   528 	 *     A multi-dimensional indexed array on success, else empty array.
       
   529 	 *
       
   530 	 *     @type string[] $0 Meta elements with a content attribute.
       
   531 	 *     @type string[] $1 Content attribute's opening quotation mark.
       
   532 	 *     @type string[] $2 Content attribute's value for each meta element.
       
   533 	 * }
       
   534 	 */
       
   535 	private function get_meta_with_content_elements( $html ) {
       
   536 		/*
       
   537 		 * Parse all meta elements with a content attribute.
       
   538 		 *
       
   539 		 * Why first search for the content attribute rather than directly searching for name=description element?
       
   540 		 * tl;dr The content attribute's value will be truncated when it contains a > symbol.
       
   541 		 *
       
   542 		 * The content attribute's value (i.e. the description to get) can have HTML in it and be well-formed as
       
   543 		 * it's a string to the browser. Imagine what happens when attempting to match for the name=description
       
   544 		 * first. Hmm, if a > or /> symbol is in the content attribute's value, then it terminates the match
       
   545 		 * as the element's closing symbol. But wait, it's in the content attribute and is not the end of the
       
   546 		 * element. This is a limitation of using regex. It can't determine "wait a minute this is inside of quotation".
       
   547 		 * If this happens, what gets matched is not the entire element or all of the content.
       
   548 		 *
       
   549 		 * Why not search for the name=description and then content="(.*)"?
       
   550 		 * The attribute order could be opposite. Plus, additional attributes may exist including being between
       
   551 		 * the name and content attributes.
       
   552 		 *
       
   553 		 * Why not lookahead?
       
   554 		 * Lookahead is not constrained to stay within the element. The first <meta it finds may not include
       
   555 		 * the name or content, but rather could be from a different element downstream.
       
   556 		 */
       
   557 		$pattern = '#<meta\s' .
       
   558 
       
   559 				/*
       
   560 				 * Allows for additional attributes before the content attribute.
       
   561 				 * Searches for anything other than > symbol.
       
   562 				 */
       
   563 				'[^>]*' .
       
   564 
       
   565 				/*
       
   566 				* Find the content attribute. When found, capture its value (.*).
       
   567 				*
       
   568 				* Allows for (a) single or double quotes and (b) whitespace in the value.
       
   569 				*
       
   570 				* Why capture the opening quotation mark, i.e. (["\']), and then backreference,
       
   571 				* i.e \1, for the closing quotation mark?
       
   572 				* To ensure the closing quotation mark matches the opening one. Why? Attribute values
       
   573 				* can contain quotation marks, such as an apostrophe in the content.
       
   574 				*/
       
   575 				'content=(["\']??)(.*)\1' .
       
   576 
       
   577 				/*
       
   578 				* Allows for additional attributes after the content attribute.
       
   579 				* Searches for anything other than > symbol.
       
   580 				*/
       
   581 				'[^>]*' .
       
   582 
       
   583 				/*
       
   584 				* \/?> searches for the closing > symbol, which can be in either /> or > format.
       
   585 				* # ends the pattern.
       
   586 				*/
       
   587 				'\/?>#' .
       
   588 
       
   589 				/*
       
   590 				* These are the options:
       
   591 				* - i : case insensitive
       
   592 				* - s : allows newline characters for the . match (needed for multiline elements)
       
   593 				* - U means non-greedy matching
       
   594 				*/
       
   595 				'isU';
       
   596 
       
   597 		preg_match_all( $pattern, $html, $elements );
       
   598 
       
   599 		return $elements;
       
   600 	}
       
   601 
       
   602 	/**
       
   603 	 * Gets the metadata from a target meta element.
       
   604 	 *
       
   605 	 * @since 5.9.0
       
   606 	 *
       
   607 	 * @param array  $meta_elements {
       
   608 	 *     A multi-dimensional indexed array on success, else empty array.
       
   609 	 *
       
   610 	 *     @type string[] $0 Meta elements with a content attribute.
       
   611 	 *     @type string[] $1 Content attribute's opening quotation mark.
       
   612 	 *     @type string[] $2 Content attribute's value for each meta element.
       
   613 	 * }
       
   614 	 * @param string $attr       Attribute that identifies the element with the target metadata.
       
   615 	 * @param string $attr_value The attribute's value that identifies the element with the target metadata.
       
   616 	 * @return string The metadata on success. Empty string if not found.
       
   617 	 */
       
   618 	private function get_metadata_from_meta_element( $meta_elements, $attr, $attr_value ) {
       
   619 		// Bail out if there are no meta elements.
       
   620 		if ( empty( $meta_elements[0] ) ) {
       
   621 			return '';
       
   622 		}
       
   623 
       
   624 		$metadata = '';
       
   625 		$pattern  = '#' .
       
   626 				/*
       
   627 				 * Target this attribute and value to find the metadata element.
       
   628 				 *
       
   629 				 * Allows for (a) no, single, double quotes and (b) whitespace in the value.
       
   630 				 *
       
   631 				 * Why capture the opening quotation mark, i.e. (["\']), and then backreference,
       
   632 				 * i.e \1, for the closing quotation mark?
       
   633 				 * To ensure the closing quotation mark matches the opening one. Why? Attribute values
       
   634 				 * can contain quotation marks, such as an apostrophe in the content.
       
   635 				 */
       
   636 				$attr . '=([\"\']??)\s*' . $attr_value . '\s*\1' .
       
   637 
       
   638 				/*
       
   639 				 * These are the options:
       
   640 				 * - i : case insensitive
       
   641 				 * - s : allows newline characters for the . match (needed for multiline elements)
       
   642 				 * - U means non-greedy matching
       
   643 				 */
       
   644 				'#isU';
       
   645 
       
   646 		// Find the metadata element.
       
   647 		foreach ( $meta_elements[0] as $index => $element ) {
       
   648 			preg_match( $pattern, $element, $match );
       
   649 
       
   650 			// This is not the metadata element. Skip it.
       
   651 			if ( empty( $match ) ) {
       
   652 				continue;
       
   653 			}
       
   654 
       
   655 			/*
       
   656 			 * Found the metadata element.
       
   657 			 * Get the metadata from its matching content array.
       
   658 			 */
       
   659 			if ( isset( $meta_elements[2][ $index ] ) && is_string( $meta_elements[2][ $index ] ) ) {
       
   660 				$metadata = trim( $meta_elements[2][ $index ] );
       
   661 			}
       
   662 
       
   663 			break;
       
   664 		}
       
   665 
       
   666 		return $metadata;
       
   667 	}
       
   668 }