wp/wp-includes/Requests/src/Iri.php
changeset 21 48c4eec2b7e6
equal deleted inserted replaced
20:7b1b88e27a20 21:48c4eec2b7e6
       
     1 <?php
       
     2 /**
       
     3  * IRI parser/serialiser/normaliser
       
     4  *
       
     5  * @package Requests\Utilities
       
     6  */
       
     7 
       
     8 namespace WpOrg\Requests;
       
     9 
       
    10 use WpOrg\Requests\Exception;
       
    11 use WpOrg\Requests\Exception\InvalidArgument;
       
    12 use WpOrg\Requests\Ipv6;
       
    13 use WpOrg\Requests\Port;
       
    14 use WpOrg\Requests\Utility\InputValidator;
       
    15 
       
    16 /**
       
    17  * IRI parser/serialiser/normaliser
       
    18  *
       
    19  * Copyright (c) 2007-2010, Geoffrey Sneddon and Steve Minutillo.
       
    20  * All rights reserved.
       
    21  *
       
    22  * Redistribution and use in source and binary forms, with or without
       
    23  * modification, are permitted provided that the following conditions are met:
       
    24  *
       
    25  *  * Redistributions of source code must retain the above copyright notice,
       
    26  *       this list of conditions and the following disclaimer.
       
    27  *
       
    28  *  * Redistributions in binary form must reproduce the above copyright notice,
       
    29  *       this list of conditions and the following disclaimer in the documentation
       
    30  *       and/or other materials provided with the distribution.
       
    31  *
       
    32  *  * Neither the name of the SimplePie Team nor the names of its contributors
       
    33  *       may be used to endorse or promote products derived from this software
       
    34  *       without specific prior written permission.
       
    35  *
       
    36  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
       
    37  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    38  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    39  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE
       
    40  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       
    41  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
       
    42  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
       
    43  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
       
    44  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
       
    45  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    46  * POSSIBILITY OF SUCH DAMAGE.
       
    47  *
       
    48  * @package Requests\Utilities
       
    49  * @author Geoffrey Sneddon
       
    50  * @author Steve Minutillo
       
    51  * @copyright 2007-2009 Geoffrey Sneddon and Steve Minutillo
       
    52  * @license https://opensource.org/licenses/bsd-license.php
       
    53  * @link http://hg.gsnedders.com/iri/
       
    54  *
       
    55  * @property string $iri IRI we're working with
       
    56  * @property-read string $uri IRI in URI form, {@see \WpOrg\Requests\Iri::to_uri()}
       
    57  * @property string $scheme Scheme part of the IRI
       
    58  * @property string $authority Authority part, formatted for a URI (userinfo + host + port)
       
    59  * @property string $iauthority Authority part of the IRI (userinfo + host + port)
       
    60  * @property string $userinfo Userinfo part, formatted for a URI (after '://' and before '@')
       
    61  * @property string $iuserinfo Userinfo part of the IRI (after '://' and before '@')
       
    62  * @property string $host Host part, formatted for a URI
       
    63  * @property string $ihost Host part of the IRI
       
    64  * @property string $port Port part of the IRI (after ':')
       
    65  * @property string $path Path part, formatted for a URI (after first '/')
       
    66  * @property string $ipath Path part of the IRI (after first '/')
       
    67  * @property string $query Query part, formatted for a URI (after '?')
       
    68  * @property string $iquery Query part of the IRI (after '?')
       
    69  * @property string $fragment Fragment, formatted for a URI (after '#')
       
    70  * @property string $ifragment Fragment part of the IRI (after '#')
       
    71  */
       
    72 class Iri {
       
    73 	/**
       
    74 	 * Scheme
       
    75 	 *
       
    76 	 * @var string|null
       
    77 	 */
       
    78 	protected $scheme = null;
       
    79 
       
    80 	/**
       
    81 	 * User Information
       
    82 	 *
       
    83 	 * @var string|null
       
    84 	 */
       
    85 	protected $iuserinfo = null;
       
    86 
       
    87 	/**
       
    88 	 * ihost
       
    89 	 *
       
    90 	 * @var string|null
       
    91 	 */
       
    92 	protected $ihost = null;
       
    93 
       
    94 	/**
       
    95 	 * Port
       
    96 	 *
       
    97 	 * @var string|null
       
    98 	 */
       
    99 	protected $port = null;
       
   100 
       
   101 	/**
       
   102 	 * ipath
       
   103 	 *
       
   104 	 * @var string
       
   105 	 */
       
   106 	protected $ipath = '';
       
   107 
       
   108 	/**
       
   109 	 * iquery
       
   110 	 *
       
   111 	 * @var string|null
       
   112 	 */
       
   113 	protected $iquery = null;
       
   114 
       
   115 	/**
       
   116 	 * ifragment|null
       
   117 	 *
       
   118 	 * @var string
       
   119 	 */
       
   120 	protected $ifragment = null;
       
   121 
       
   122 	/**
       
   123 	 * Normalization database
       
   124 	 *
       
   125 	 * Each key is the scheme, each value is an array with each key as the IRI
       
   126 	 * part and value as the default value for that part.
       
   127 	 *
       
   128 	 * @var array
       
   129 	 */
       
   130 	protected $normalization = array(
       
   131 		'acap' => array(
       
   132 			'port' => Port::ACAP,
       
   133 		),
       
   134 		'dict' => array(
       
   135 			'port' => Port::DICT,
       
   136 		),
       
   137 		'file' => array(
       
   138 			'ihost' => 'localhost',
       
   139 		),
       
   140 		'http' => array(
       
   141 			'port' => Port::HTTP,
       
   142 		),
       
   143 		'https' => array(
       
   144 			'port' => Port::HTTPS,
       
   145 		),
       
   146 	);
       
   147 
       
   148 	/**
       
   149 	 * Return the entire IRI when you try and read the object as a string
       
   150 	 *
       
   151 	 * @return string
       
   152 	 */
       
   153 	public function __toString() {
       
   154 		return $this->get_iri();
       
   155 	}
       
   156 
       
   157 	/**
       
   158 	 * Overload __set() to provide access via properties
       
   159 	 *
       
   160 	 * @param string $name Property name
       
   161 	 * @param mixed $value Property value
       
   162 	 */
       
   163 	public function __set($name, $value) {
       
   164 		if (method_exists($this, 'set_' . $name)) {
       
   165 			call_user_func(array($this, 'set_' . $name), $value);
       
   166 		}
       
   167 		elseif (
       
   168 			   $name === 'iauthority'
       
   169 			|| $name === 'iuserinfo'
       
   170 			|| $name === 'ihost'
       
   171 			|| $name === 'ipath'
       
   172 			|| $name === 'iquery'
       
   173 			|| $name === 'ifragment'
       
   174 		) {
       
   175 			call_user_func(array($this, 'set_' . substr($name, 1)), $value);
       
   176 		}
       
   177 	}
       
   178 
       
   179 	/**
       
   180 	 * Overload __get() to provide access via properties
       
   181 	 *
       
   182 	 * @param string $name Property name
       
   183 	 * @return mixed
       
   184 	 */
       
   185 	public function __get($name) {
       
   186 		// isset() returns false for null, we don't want to do that
       
   187 		// Also why we use array_key_exists below instead of isset()
       
   188 		$props = get_object_vars($this);
       
   189 
       
   190 		if (
       
   191 			$name === 'iri' ||
       
   192 			$name === 'uri' ||
       
   193 			$name === 'iauthority' ||
       
   194 			$name === 'authority'
       
   195 		) {
       
   196 			$method = 'get_' . $name;
       
   197 			$return = $this->$method();
       
   198 		}
       
   199 		elseif (array_key_exists($name, $props)) {
       
   200 			$return = $this->$name;
       
   201 		}
       
   202 		// host -> ihost
       
   203 		elseif (($prop = 'i' . $name) && array_key_exists($prop, $props)) {
       
   204 			$name = $prop;
       
   205 			$return = $this->$prop;
       
   206 		}
       
   207 		// ischeme -> scheme
       
   208 		elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props)) {
       
   209 			$name = $prop;
       
   210 			$return = $this->$prop;
       
   211 		}
       
   212 		else {
       
   213 			trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE);
       
   214 			$return = null;
       
   215 		}
       
   216 
       
   217 		if ($return === null && isset($this->normalization[$this->scheme][$name])) {
       
   218 			return $this->normalization[$this->scheme][$name];
       
   219 		}
       
   220 		else {
       
   221 			return $return;
       
   222 		}
       
   223 	}
       
   224 
       
   225 	/**
       
   226 	 * Overload __isset() to provide access via properties
       
   227 	 *
       
   228 	 * @param string $name Property name
       
   229 	 * @return bool
       
   230 	 */
       
   231 	public function __isset($name) {
       
   232 		return (method_exists($this, 'get_' . $name) || isset($this->$name));
       
   233 	}
       
   234 
       
   235 	/**
       
   236 	 * Overload __unset() to provide access via properties
       
   237 	 *
       
   238 	 * @param string $name Property name
       
   239 	 */
       
   240 	public function __unset($name) {
       
   241 		if (method_exists($this, 'set_' . $name)) {
       
   242 			call_user_func(array($this, 'set_' . $name), '');
       
   243 		}
       
   244 	}
       
   245 
       
   246 	/**
       
   247 	 * Create a new IRI object, from a specified string
       
   248 	 *
       
   249 	 * @param string|Stringable|null $iri
       
   250 	 *
       
   251 	 * @throws \WpOrg\Requests\Exception\InvalidArgument When the passed $iri argument is not a string, Stringable or null.
       
   252 	 */
       
   253 	public function __construct($iri = null) {
       
   254 		if ($iri !== null && InputValidator::is_string_or_stringable($iri) === false) {
       
   255 			throw InvalidArgument::create(1, '$iri', 'string|Stringable|null', gettype($iri));
       
   256 		}
       
   257 
       
   258 		$this->set_iri($iri);
       
   259 	}
       
   260 
       
   261 	/**
       
   262 	 * Create a new IRI object by resolving a relative IRI
       
   263 	 *
       
   264 	 * Returns false if $base is not absolute, otherwise an IRI.
       
   265 	 *
       
   266 	 * @param \WpOrg\Requests\Iri|string $base (Absolute) Base IRI
       
   267 	 * @param \WpOrg\Requests\Iri|string $relative Relative IRI
       
   268 	 * @return \WpOrg\Requests\Iri|false
       
   269 	 */
       
   270 	public static function absolutize($base, $relative) {
       
   271 		if (!($relative instanceof self)) {
       
   272 			$relative = new self($relative);
       
   273 		}
       
   274 		if (!$relative->is_valid()) {
       
   275 			return false;
       
   276 		}
       
   277 		elseif ($relative->scheme !== null) {
       
   278 			return clone $relative;
       
   279 		}
       
   280 
       
   281 		if (!($base instanceof self)) {
       
   282 			$base = new self($base);
       
   283 		}
       
   284 		if ($base->scheme === null || !$base->is_valid()) {
       
   285 			return false;
       
   286 		}
       
   287 
       
   288 		if ($relative->get_iri() !== '') {
       
   289 			if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null) {
       
   290 				$target = clone $relative;
       
   291 				$target->scheme = $base->scheme;
       
   292 			}
       
   293 			else {
       
   294 				$target = new self;
       
   295 				$target->scheme = $base->scheme;
       
   296 				$target->iuserinfo = $base->iuserinfo;
       
   297 				$target->ihost = $base->ihost;
       
   298 				$target->port = $base->port;
       
   299 				if ($relative->ipath !== '') {
       
   300 					if ($relative->ipath[0] === '/') {
       
   301 						$target->ipath = $relative->ipath;
       
   302 					}
       
   303 					elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '') {
       
   304 						$target->ipath = '/' . $relative->ipath;
       
   305 					}
       
   306 					elseif (($last_segment = strrpos($base->ipath, '/')) !== false) {
       
   307 						$target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath;
       
   308 					}
       
   309 					else {
       
   310 						$target->ipath = $relative->ipath;
       
   311 					}
       
   312 					$target->ipath = $target->remove_dot_segments($target->ipath);
       
   313 					$target->iquery = $relative->iquery;
       
   314 				}
       
   315 				else {
       
   316 					$target->ipath = $base->ipath;
       
   317 					if ($relative->iquery !== null) {
       
   318 						$target->iquery = $relative->iquery;
       
   319 					}
       
   320 					elseif ($base->iquery !== null) {
       
   321 						$target->iquery = $base->iquery;
       
   322 					}
       
   323 				}
       
   324 				$target->ifragment = $relative->ifragment;
       
   325 			}
       
   326 		}
       
   327 		else {
       
   328 			$target = clone $base;
       
   329 			$target->ifragment = null;
       
   330 		}
       
   331 		$target->scheme_normalization();
       
   332 		return $target;
       
   333 	}
       
   334 
       
   335 	/**
       
   336 	 * Parse an IRI into scheme/authority/path/query/fragment segments
       
   337 	 *
       
   338 	 * @param string $iri
       
   339 	 * @return array
       
   340 	 */
       
   341 	protected function parse_iri($iri) {
       
   342 		$iri = trim($iri, "\x20\x09\x0A\x0C\x0D");
       
   343 		$has_match = preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match);
       
   344 		if (!$has_match) {
       
   345 			throw new Exception('Cannot parse supplied IRI', 'iri.cannot_parse', $iri);
       
   346 		}
       
   347 
       
   348 		if ($match[1] === '') {
       
   349 			$match['scheme'] = null;
       
   350 		}
       
   351 		if (!isset($match[3]) || $match[3] === '') {
       
   352 			$match['authority'] = null;
       
   353 		}
       
   354 		if (!isset($match[5])) {
       
   355 			$match['path'] = '';
       
   356 		}
       
   357 		if (!isset($match[6]) || $match[6] === '') {
       
   358 			$match['query'] = null;
       
   359 		}
       
   360 		if (!isset($match[8]) || $match[8] === '') {
       
   361 			$match['fragment'] = null;
       
   362 		}
       
   363 		return $match;
       
   364 	}
       
   365 
       
   366 	/**
       
   367 	 * Remove dot segments from a path
       
   368 	 *
       
   369 	 * @param string $input
       
   370 	 * @return string
       
   371 	 */
       
   372 	protected function remove_dot_segments($input) {
       
   373 		$output = '';
       
   374 		while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..') {
       
   375 			// A: If the input buffer begins with a prefix of "../" or "./",
       
   376 			// then remove that prefix from the input buffer; otherwise,
       
   377 			if (strpos($input, '../') === 0) {
       
   378 				$input = substr($input, 3);
       
   379 			}
       
   380 			elseif (strpos($input, './') === 0) {
       
   381 				$input = substr($input, 2);
       
   382 			}
       
   383 			// B: if the input buffer begins with a prefix of "/./" or "/.",
       
   384 			// where "." is a complete path segment, then replace that prefix
       
   385 			// with "/" in the input buffer; otherwise,
       
   386 			elseif (strpos($input, '/./') === 0) {
       
   387 				$input = substr($input, 2);
       
   388 			}
       
   389 			elseif ($input === '/.') {
       
   390 				$input = '/';
       
   391 			}
       
   392 			// C: if the input buffer begins with a prefix of "/../" or "/..",
       
   393 			// where ".." is a complete path segment, then replace that prefix
       
   394 			// with "/" in the input buffer and remove the last segment and its
       
   395 			// preceding "/" (if any) from the output buffer; otherwise,
       
   396 			elseif (strpos($input, '/../') === 0) {
       
   397 				$input = substr($input, 3);
       
   398 				$output = substr_replace($output, '', (strrpos($output, '/') ?: 0));
       
   399 			}
       
   400 			elseif ($input === '/..') {
       
   401 				$input = '/';
       
   402 				$output = substr_replace($output, '', (strrpos($output, '/') ?: 0));
       
   403 			}
       
   404 			// D: if the input buffer consists only of "." or "..", then remove
       
   405 			// that from the input buffer; otherwise,
       
   406 			elseif ($input === '.' || $input === '..') {
       
   407 				$input = '';
       
   408 			}
       
   409 			// E: move the first path segment in the input buffer to the end of
       
   410 			// the output buffer, including the initial "/" character (if any)
       
   411 			// and any subsequent characters up to, but not including, the next
       
   412 			// "/" character or the end of the input buffer
       
   413 			elseif (($pos = strpos($input, '/', 1)) !== false) {
       
   414 				$output .= substr($input, 0, $pos);
       
   415 				$input = substr_replace($input, '', 0, $pos);
       
   416 			}
       
   417 			else {
       
   418 				$output .= $input;
       
   419 				$input = '';
       
   420 			}
       
   421 		}
       
   422 		return $output . $input;
       
   423 	}
       
   424 
       
   425 	/**
       
   426 	 * Replace invalid character with percent encoding
       
   427 	 *
       
   428 	 * @param string $text Input string
       
   429 	 * @param string $extra_chars Valid characters not in iunreserved or
       
   430 	 *                            iprivate (this is ASCII-only)
       
   431 	 * @param bool $iprivate Allow iprivate
       
   432 	 * @return string
       
   433 	 */
       
   434 	protected function replace_invalid_with_pct_encoding($text, $extra_chars, $iprivate = false) {
       
   435 		// Normalize as many pct-encoded sections as possible
       
   436 		$text = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array($this, 'remove_iunreserved_percent_encoded'), $text);
       
   437 
       
   438 		// Replace invalid percent characters
       
   439 		$text = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $text);
       
   440 
       
   441 		// Add unreserved and % to $extra_chars (the latter is safe because all
       
   442 		// pct-encoded sections are now valid).
       
   443 		$extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%';
       
   444 
       
   445 		// Now replace any bytes that aren't allowed with their pct-encoded versions
       
   446 		$position = 0;
       
   447 		$strlen = strlen($text);
       
   448 		while (($position += strspn($text, $extra_chars, $position)) < $strlen) {
       
   449 			$value = ord($text[$position]);
       
   450 
       
   451 			// Start position
       
   452 			$start = $position;
       
   453 
       
   454 			// By default we are valid
       
   455 			$valid = true;
       
   456 
       
   457 			// No one byte sequences are valid due to the while.
       
   458 			// Two byte sequence:
       
   459 			if (($value & 0xE0) === 0xC0) {
       
   460 				$character = ($value & 0x1F) << 6;
       
   461 				$length = 2;
       
   462 				$remaining = 1;
       
   463 			}
       
   464 			// Three byte sequence:
       
   465 			elseif (($value & 0xF0) === 0xE0) {
       
   466 				$character = ($value & 0x0F) << 12;
       
   467 				$length = 3;
       
   468 				$remaining = 2;
       
   469 			}
       
   470 			// Four byte sequence:
       
   471 			elseif (($value & 0xF8) === 0xF0) {
       
   472 				$character = ($value & 0x07) << 18;
       
   473 				$length = 4;
       
   474 				$remaining = 3;
       
   475 			}
       
   476 			// Invalid byte:
       
   477 			else {
       
   478 				$valid = false;
       
   479 				$length = 1;
       
   480 				$remaining = 0;
       
   481 			}
       
   482 
       
   483 			if ($remaining) {
       
   484 				if ($position + $length <= $strlen) {
       
   485 					for ($position++; $remaining; $position++) {
       
   486 						$value = ord($text[$position]);
       
   487 
       
   488 						// Check that the byte is valid, then add it to the character:
       
   489 						if (($value & 0xC0) === 0x80) {
       
   490 							$character |= ($value & 0x3F) << (--$remaining * 6);
       
   491 						}
       
   492 						// If it is invalid, count the sequence as invalid and reprocess the current byte:
       
   493 						else {
       
   494 							$valid = false;
       
   495 							$position--;
       
   496 							break;
       
   497 						}
       
   498 					}
       
   499 				}
       
   500 				else {
       
   501 					$position = $strlen - 1;
       
   502 					$valid = false;
       
   503 				}
       
   504 			}
       
   505 
       
   506 			// Percent encode anything invalid or not in ucschar
       
   507 			if (
       
   508 				// Invalid sequences
       
   509 				!$valid
       
   510 				// Non-shortest form sequences are invalid
       
   511 				|| $length > 1 && $character <= 0x7F
       
   512 				|| $length > 2 && $character <= 0x7FF
       
   513 				|| $length > 3 && $character <= 0xFFFF
       
   514 				// Outside of range of ucschar codepoints
       
   515 				// Noncharacters
       
   516 				|| ($character & 0xFFFE) === 0xFFFE
       
   517 				|| $character >= 0xFDD0 && $character <= 0xFDEF
       
   518 				|| (
       
   519 					// Everything else not in ucschar
       
   520 					   $character > 0xD7FF && $character < 0xF900
       
   521 					|| $character < 0xA0
       
   522 					|| $character > 0xEFFFD
       
   523 				)
       
   524 				&& (
       
   525 					// Everything not in iprivate, if it applies
       
   526 					   !$iprivate
       
   527 					|| $character < 0xE000
       
   528 					|| $character > 0x10FFFD
       
   529 				)
       
   530 			) {
       
   531 				// If we were a character, pretend we weren't, but rather an error.
       
   532 				if ($valid) {
       
   533 					$position--;
       
   534 				}
       
   535 
       
   536 				for ($j = $start; $j <= $position; $j++) {
       
   537 					$text = substr_replace($text, sprintf('%%%02X', ord($text[$j])), $j, 1);
       
   538 					$j += 2;
       
   539 					$position += 2;
       
   540 					$strlen += 2;
       
   541 				}
       
   542 			}
       
   543 		}
       
   544 
       
   545 		return $text;
       
   546 	}
       
   547 
       
   548 	/**
       
   549 	 * Callback function for preg_replace_callback.
       
   550 	 *
       
   551 	 * Removes sequences of percent encoded bytes that represent UTF-8
       
   552 	 * encoded characters in iunreserved
       
   553 	 *
       
   554 	 * @param array $regex_match PCRE match
       
   555 	 * @return string Replacement
       
   556 	 */
       
   557 	protected function remove_iunreserved_percent_encoded($regex_match) {
       
   558 		// As we just have valid percent encoded sequences we can just explode
       
   559 		// and ignore the first member of the returned array (an empty string).
       
   560 		$bytes = explode('%', $regex_match[0]);
       
   561 
       
   562 		// Initialize the new string (this is what will be returned) and that
       
   563 		// there are no bytes remaining in the current sequence (unsurprising
       
   564 		// at the first byte!).
       
   565 		$string = '';
       
   566 		$remaining = 0;
       
   567 
       
   568 		// Loop over each and every byte, and set $value to its value
       
   569 		for ($i = 1, $len = count($bytes); $i < $len; $i++) {
       
   570 			$value = hexdec($bytes[$i]);
       
   571 
       
   572 			// If we're the first byte of sequence:
       
   573 			if (!$remaining) {
       
   574 				// Start position
       
   575 				$start = $i;
       
   576 
       
   577 				// By default we are valid
       
   578 				$valid = true;
       
   579 
       
   580 				// One byte sequence:
       
   581 				if ($value <= 0x7F) {
       
   582 					$character = $value;
       
   583 					$length = 1;
       
   584 				}
       
   585 				// Two byte sequence:
       
   586 				elseif (($value & 0xE0) === 0xC0) {
       
   587 					$character = ($value & 0x1F) << 6;
       
   588 					$length = 2;
       
   589 					$remaining = 1;
       
   590 				}
       
   591 				// Three byte sequence:
       
   592 				elseif (($value & 0xF0) === 0xE0) {
       
   593 					$character = ($value & 0x0F) << 12;
       
   594 					$length = 3;
       
   595 					$remaining = 2;
       
   596 				}
       
   597 				// Four byte sequence:
       
   598 				elseif (($value & 0xF8) === 0xF0) {
       
   599 					$character = ($value & 0x07) << 18;
       
   600 					$length = 4;
       
   601 					$remaining = 3;
       
   602 				}
       
   603 				// Invalid byte:
       
   604 				else {
       
   605 					$valid = false;
       
   606 					$remaining = 0;
       
   607 				}
       
   608 			}
       
   609 			// Continuation byte:
       
   610 			else {
       
   611 				// Check that the byte is valid, then add it to the character:
       
   612 				if (($value & 0xC0) === 0x80) {
       
   613 					$remaining--;
       
   614 					$character |= ($value & 0x3F) << ($remaining * 6);
       
   615 				}
       
   616 				// If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
       
   617 				else {
       
   618 					$valid = false;
       
   619 					$remaining = 0;
       
   620 					$i--;
       
   621 				}
       
   622 			}
       
   623 
       
   624 			// If we've reached the end of the current byte sequence, append it to Unicode::$data
       
   625 			if (!$remaining) {
       
   626 				// Percent encode anything invalid or not in iunreserved
       
   627 				if (
       
   628 					// Invalid sequences
       
   629 					!$valid
       
   630 					// Non-shortest form sequences are invalid
       
   631 					|| $length > 1 && $character <= 0x7F
       
   632 					|| $length > 2 && $character <= 0x7FF
       
   633 					|| $length > 3 && $character <= 0xFFFF
       
   634 					// Outside of range of iunreserved codepoints
       
   635 					|| $character < 0x2D
       
   636 					|| $character > 0xEFFFD
       
   637 					// Noncharacters
       
   638 					|| ($character & 0xFFFE) === 0xFFFE
       
   639 					|| $character >= 0xFDD0 && $character <= 0xFDEF
       
   640 					// Everything else not in iunreserved (this is all BMP)
       
   641 					|| $character === 0x2F
       
   642 					|| $character > 0x39 && $character < 0x41
       
   643 					|| $character > 0x5A && $character < 0x61
       
   644 					|| $character > 0x7A && $character < 0x7E
       
   645 					|| $character > 0x7E && $character < 0xA0
       
   646 					|| $character > 0xD7FF && $character < 0xF900
       
   647 				) {
       
   648 					for ($j = $start; $j <= $i; $j++) {
       
   649 						$string .= '%' . strtoupper($bytes[$j]);
       
   650 					}
       
   651 				}
       
   652 				else {
       
   653 					for ($j = $start; $j <= $i; $j++) {
       
   654 						$string .= chr(hexdec($bytes[$j]));
       
   655 					}
       
   656 				}
       
   657 			}
       
   658 		}
       
   659 
       
   660 		// If we have any bytes left over they are invalid (i.e., we are
       
   661 		// mid-way through a multi-byte sequence)
       
   662 		if ($remaining) {
       
   663 			for ($j = $start; $j < $len; $j++) {
       
   664 				$string .= '%' . strtoupper($bytes[$j]);
       
   665 			}
       
   666 		}
       
   667 
       
   668 		return $string;
       
   669 	}
       
   670 
       
   671 	protected function scheme_normalization() {
       
   672 		if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo']) {
       
   673 			$this->iuserinfo = null;
       
   674 		}
       
   675 		if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost']) {
       
   676 			$this->ihost = null;
       
   677 		}
       
   678 		if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port']) {
       
   679 			$this->port = null;
       
   680 		}
       
   681 		if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath']) {
       
   682 			$this->ipath = '';
       
   683 		}
       
   684 		if (isset($this->ihost) && empty($this->ipath)) {
       
   685 			$this->ipath = '/';
       
   686 		}
       
   687 		if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery']) {
       
   688 			$this->iquery = null;
       
   689 		}
       
   690 		if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment']) {
       
   691 			$this->ifragment = null;
       
   692 		}
       
   693 	}
       
   694 
       
   695 	/**
       
   696 	 * Check if the object represents a valid IRI. This needs to be done on each
       
   697 	 * call as some things change depending on another part of the IRI.
       
   698 	 *
       
   699 	 * @return bool
       
   700 	 */
       
   701 	public function is_valid() {
       
   702 		$isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null;
       
   703 		if ($this->ipath !== '' &&
       
   704 			(
       
   705 				$isauthority && $this->ipath[0] !== '/' ||
       
   706 				(
       
   707 					$this->scheme === null &&
       
   708 					!$isauthority &&
       
   709 					strpos($this->ipath, ':') !== false &&
       
   710 					(strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/'))
       
   711 				)
       
   712 			)
       
   713 		) {
       
   714 			return false;
       
   715 		}
       
   716 
       
   717 		return true;
       
   718 	}
       
   719 
       
   720 	public function __wakeup() {
       
   721 		$class_props = get_class_vars( __CLASS__ );
       
   722 		$string_props = array( 'scheme', 'iuserinfo', 'ihost', 'port', 'ipath', 'iquery', 'ifragment' );
       
   723 		$array_props = array( 'normalization' );
       
   724 		foreach ( $class_props as $prop => $default_value ) {
       
   725 			if ( in_array( $prop, $string_props, true ) && ! is_string( $this->$prop ) ) {
       
   726 				throw new UnexpectedValueException();
       
   727 			} elseif ( in_array( $prop, $array_props, true ) && ! is_array( $this->$prop ) ) {
       
   728 				throw new UnexpectedValueException();
       
   729 			}
       
   730 			$this->$prop = null;
       
   731 		}
       
   732 	}
       
   733 
       
   734 	/**
       
   735 	 * Set the entire IRI. Returns true on success, false on failure (if there
       
   736 	 * are any invalid characters).
       
   737 	 *
       
   738 	 * @param string $iri
       
   739 	 * @return bool
       
   740 	 */
       
   741 	protected function set_iri($iri) {
       
   742 		static $cache;
       
   743 		if (!$cache) {
       
   744 			$cache = array();
       
   745 		}
       
   746 
       
   747 		if ($iri === null) {
       
   748 			return true;
       
   749 		}
       
   750 
       
   751 		$iri = (string) $iri;
       
   752 
       
   753 		if (isset($cache[$iri])) {
       
   754 			list($this->scheme,
       
   755 				 $this->iuserinfo,
       
   756 				 $this->ihost,
       
   757 				 $this->port,
       
   758 				 $this->ipath,
       
   759 				 $this->iquery,
       
   760 				 $this->ifragment,
       
   761 				 $return) = $cache[$iri];
       
   762 			return $return;
       
   763 		}
       
   764 
       
   765 		$parsed = $this->parse_iri($iri);
       
   766 
       
   767 		$return = $this->set_scheme($parsed['scheme'])
       
   768 			&& $this->set_authority($parsed['authority'])
       
   769 			&& $this->set_path($parsed['path'])
       
   770 			&& $this->set_query($parsed['query'])
       
   771 			&& $this->set_fragment($parsed['fragment']);
       
   772 
       
   773 		$cache[$iri] = array($this->scheme,
       
   774 							 $this->iuserinfo,
       
   775 							 $this->ihost,
       
   776 							 $this->port,
       
   777 							 $this->ipath,
       
   778 							 $this->iquery,
       
   779 							 $this->ifragment,
       
   780 							 $return);
       
   781 		return $return;
       
   782 	}
       
   783 
       
   784 	/**
       
   785 	 * Set the scheme. Returns true on success, false on failure (if there are
       
   786 	 * any invalid characters).
       
   787 	 *
       
   788 	 * @param string $scheme
       
   789 	 * @return bool
       
   790 	 */
       
   791 	protected function set_scheme($scheme) {
       
   792 		if ($scheme === null) {
       
   793 			$this->scheme = null;
       
   794 		}
       
   795 		elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme)) {
       
   796 			$this->scheme = null;
       
   797 			return false;
       
   798 		}
       
   799 		else {
       
   800 			$this->scheme = strtolower($scheme);
       
   801 		}
       
   802 		return true;
       
   803 	}
       
   804 
       
   805 	/**
       
   806 	 * Set the authority. Returns true on success, false on failure (if there are
       
   807 	 * any invalid characters).
       
   808 	 *
       
   809 	 * @param string $authority
       
   810 	 * @return bool
       
   811 	 */
       
   812 	protected function set_authority($authority) {
       
   813 		static $cache;
       
   814 		if (!$cache) {
       
   815 			$cache = array();
       
   816 		}
       
   817 
       
   818 		if ($authority === null) {
       
   819 			$this->iuserinfo = null;
       
   820 			$this->ihost = null;
       
   821 			$this->port = null;
       
   822 			return true;
       
   823 		}
       
   824 		if (isset($cache[$authority])) {
       
   825 			list($this->iuserinfo,
       
   826 				 $this->ihost,
       
   827 				 $this->port,
       
   828 				 $return) = $cache[$authority];
       
   829 
       
   830 			return $return;
       
   831 		}
       
   832 
       
   833 		$remaining = $authority;
       
   834 		if (($iuserinfo_end = strrpos($remaining, '@')) !== false) {
       
   835 			$iuserinfo = substr($remaining, 0, $iuserinfo_end);
       
   836 			$remaining = substr($remaining, $iuserinfo_end + 1);
       
   837 		}
       
   838 		else {
       
   839 			$iuserinfo = null;
       
   840 		}
       
   841 
       
   842 		if (($port_start = strpos($remaining, ':', (strpos($remaining, ']') ?: 0))) !== false) {
       
   843 			$port = substr($remaining, $port_start + 1);
       
   844 			if ($port === false || $port === '') {
       
   845 				$port = null;
       
   846 			}
       
   847 			$remaining = substr($remaining, 0, $port_start);
       
   848 		}
       
   849 		else {
       
   850 			$port = null;
       
   851 		}
       
   852 
       
   853 		$return = $this->set_userinfo($iuserinfo) &&
       
   854 				  $this->set_host($remaining) &&
       
   855 				  $this->set_port($port);
       
   856 
       
   857 		$cache[$authority] = array($this->iuserinfo,
       
   858 								   $this->ihost,
       
   859 								   $this->port,
       
   860 								   $return);
       
   861 
       
   862 		return $return;
       
   863 	}
       
   864 
       
   865 	/**
       
   866 	 * Set the iuserinfo.
       
   867 	 *
       
   868 	 * @param string $iuserinfo
       
   869 	 * @return bool
       
   870 	 */
       
   871 	protected function set_userinfo($iuserinfo) {
       
   872 		if ($iuserinfo === null) {
       
   873 			$this->iuserinfo = null;
       
   874 		}
       
   875 		else {
       
   876 			$this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:');
       
   877 			$this->scheme_normalization();
       
   878 		}
       
   879 
       
   880 		return true;
       
   881 	}
       
   882 
       
   883 	/**
       
   884 	 * Set the ihost. Returns true on success, false on failure (if there are
       
   885 	 * any invalid characters).
       
   886 	 *
       
   887 	 * @param string $ihost
       
   888 	 * @return bool
       
   889 	 */
       
   890 	protected function set_host($ihost) {
       
   891 		if ($ihost === null) {
       
   892 			$this->ihost = null;
       
   893 			return true;
       
   894 		}
       
   895 		if (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']') {
       
   896 			if (Ipv6::check_ipv6(substr($ihost, 1, -1))) {
       
   897 				$this->ihost = '[' . Ipv6::compress(substr($ihost, 1, -1)) . ']';
       
   898 			}
       
   899 			else {
       
   900 				$this->ihost = null;
       
   901 				return false;
       
   902 			}
       
   903 		}
       
   904 		else {
       
   905 			$ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;=');
       
   906 
       
   907 			// Lowercase, but ignore pct-encoded sections (as they should
       
   908 			// remain uppercase). This must be done after the previous step
       
   909 			// as that can add unescaped characters.
       
   910 			$position = 0;
       
   911 			$strlen = strlen($ihost);
       
   912 			while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen) {
       
   913 				if ($ihost[$position] === '%') {
       
   914 					$position += 3;
       
   915 				}
       
   916 				else {
       
   917 					$ihost[$position] = strtolower($ihost[$position]);
       
   918 					$position++;
       
   919 				}
       
   920 			}
       
   921 
       
   922 			$this->ihost = $ihost;
       
   923 		}
       
   924 
       
   925 		$this->scheme_normalization();
       
   926 
       
   927 		return true;
       
   928 	}
       
   929 
       
   930 	/**
       
   931 	 * Set the port. Returns true on success, false on failure (if there are
       
   932 	 * any invalid characters).
       
   933 	 *
       
   934 	 * @param string $port
       
   935 	 * @return bool
       
   936 	 */
       
   937 	protected function set_port($port) {
       
   938 		if ($port === null) {
       
   939 			$this->port = null;
       
   940 			return true;
       
   941 		}
       
   942 
       
   943 		if (strspn($port, '0123456789') === strlen($port)) {
       
   944 			$this->port = (int) $port;
       
   945 			$this->scheme_normalization();
       
   946 			return true;
       
   947 		}
       
   948 
       
   949 		$this->port = null;
       
   950 		return false;
       
   951 	}
       
   952 
       
   953 	/**
       
   954 	 * Set the ipath.
       
   955 	 *
       
   956 	 * @param string $ipath
       
   957 	 * @return bool
       
   958 	 */
       
   959 	protected function set_path($ipath) {
       
   960 		static $cache;
       
   961 		if (!$cache) {
       
   962 			$cache = array();
       
   963 		}
       
   964 
       
   965 		$ipath = (string) $ipath;
       
   966 
       
   967 		if (isset($cache[$ipath])) {
       
   968 			$this->ipath = $cache[$ipath][(int) ($this->scheme !== null)];
       
   969 		}
       
   970 		else {
       
   971 			$valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/');
       
   972 			$removed = $this->remove_dot_segments($valid);
       
   973 
       
   974 			$cache[$ipath] = array($valid, $removed);
       
   975 			$this->ipath = ($this->scheme !== null) ? $removed : $valid;
       
   976 		}
       
   977 		$this->scheme_normalization();
       
   978 		return true;
       
   979 	}
       
   980 
       
   981 	/**
       
   982 	 * Set the iquery.
       
   983 	 *
       
   984 	 * @param string $iquery
       
   985 	 * @return bool
       
   986 	 */
       
   987 	protected function set_query($iquery) {
       
   988 		if ($iquery === null) {
       
   989 			$this->iquery = null;
       
   990 		}
       
   991 		else {
       
   992 			$this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true);
       
   993 			$this->scheme_normalization();
       
   994 		}
       
   995 		return true;
       
   996 	}
       
   997 
       
   998 	/**
       
   999 	 * Set the ifragment.
       
  1000 	 *
       
  1001 	 * @param string $ifragment
       
  1002 	 * @return bool
       
  1003 	 */
       
  1004 	protected function set_fragment($ifragment) {
       
  1005 		if ($ifragment === null) {
       
  1006 			$this->ifragment = null;
       
  1007 		}
       
  1008 		else {
       
  1009 			$this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?');
       
  1010 			$this->scheme_normalization();
       
  1011 		}
       
  1012 		return true;
       
  1013 	}
       
  1014 
       
  1015 	/**
       
  1016 	 * Convert an IRI to a URI (or parts thereof)
       
  1017 	 *
       
  1018 	 * @param string|bool $iri IRI to convert (or false from {@see \WpOrg\Requests\Iri::get_iri()})
       
  1019 	 * @return string|false URI if IRI is valid, false otherwise.
       
  1020 	 */
       
  1021 	protected function to_uri($iri) {
       
  1022 		if (!is_string($iri)) {
       
  1023 			return false;
       
  1024 		}
       
  1025 
       
  1026 		static $non_ascii;
       
  1027 		if (!$non_ascii) {
       
  1028 			$non_ascii = implode('', range("\x80", "\xFF"));
       
  1029 		}
       
  1030 
       
  1031 		$position = 0;
       
  1032 		$strlen = strlen($iri);
       
  1033 		while (($position += strcspn($iri, $non_ascii, $position)) < $strlen) {
       
  1034 			$iri = substr_replace($iri, sprintf('%%%02X', ord($iri[$position])), $position, 1);
       
  1035 			$position += 3;
       
  1036 			$strlen += 2;
       
  1037 		}
       
  1038 
       
  1039 		return $iri;
       
  1040 	}
       
  1041 
       
  1042 	/**
       
  1043 	 * Get the complete IRI
       
  1044 	 *
       
  1045 	 * @return string|false
       
  1046 	 */
       
  1047 	protected function get_iri() {
       
  1048 		if (!$this->is_valid()) {
       
  1049 			return false;
       
  1050 		}
       
  1051 
       
  1052 		$iri = '';
       
  1053 		if ($this->scheme !== null) {
       
  1054 			$iri .= $this->scheme . ':';
       
  1055 		}
       
  1056 		if (($iauthority = $this->get_iauthority()) !== null) {
       
  1057 			$iri .= '//' . $iauthority;
       
  1058 		}
       
  1059 		$iri .= $this->ipath;
       
  1060 		if ($this->iquery !== null) {
       
  1061 			$iri .= '?' . $this->iquery;
       
  1062 		}
       
  1063 		if ($this->ifragment !== null) {
       
  1064 			$iri .= '#' . $this->ifragment;
       
  1065 		}
       
  1066 
       
  1067 		return $iri;
       
  1068 	}
       
  1069 
       
  1070 	/**
       
  1071 	 * Get the complete URI
       
  1072 	 *
       
  1073 	 * @return string
       
  1074 	 */
       
  1075 	protected function get_uri() {
       
  1076 		return $this->to_uri($this->get_iri());
       
  1077 	}
       
  1078 
       
  1079 	/**
       
  1080 	 * Get the complete iauthority
       
  1081 	 *
       
  1082 	 * @return string|null
       
  1083 	 */
       
  1084 	protected function get_iauthority() {
       
  1085 		if ($this->iuserinfo === null && $this->ihost === null && $this->port === null) {
       
  1086 			return null;
       
  1087 		}
       
  1088 
       
  1089 		$iauthority = '';
       
  1090 		if ($this->iuserinfo !== null) {
       
  1091 			$iauthority .= $this->iuserinfo . '@';
       
  1092 		}
       
  1093 		if ($this->ihost !== null) {
       
  1094 			$iauthority .= $this->ihost;
       
  1095 		}
       
  1096 		if ($this->port !== null) {
       
  1097 			$iauthority .= ':' . $this->port;
       
  1098 		}
       
  1099 		return $iauthority;
       
  1100 	}
       
  1101 
       
  1102 	/**
       
  1103 	 * Get the complete authority
       
  1104 	 *
       
  1105 	 * @return string
       
  1106 	 */
       
  1107 	protected function get_authority() {
       
  1108 		$iauthority = $this->get_iauthority();
       
  1109 		if (is_string($iauthority)) {
       
  1110 			return $this->to_uri($iauthority);
       
  1111 		}
       
  1112 		else {
       
  1113 			return $iauthority;
       
  1114 		}
       
  1115 	}
       
  1116 }