wp/wp-includes/Requests/IRI.php
changeset 7 cf61fcea0001
child 18 be944660c56a
equal deleted inserted replaced
6:490d5cc509ed 7:cf61fcea0001
       
     1 <?php
       
     2 /**
       
     3  * IRI parser/serialiser/normaliser
       
     4  *
       
     5  * @package Requests
       
     6  * @subpackage Utilities
       
     7  */
       
     8 
       
     9 /**
       
    10  * IRI parser/serialiser/normaliser
       
    11  *
       
    12  * Copyright (c) 2007-2010, Geoffrey Sneddon and Steve Minutillo.
       
    13  * All rights reserved.
       
    14  *
       
    15  * Redistribution and use in source and binary forms, with or without
       
    16  * modification, are permitted provided that the following conditions are met:
       
    17  *
       
    18  *  * Redistributions of source code must retain the above copyright notice,
       
    19  *       this list of conditions and the following disclaimer.
       
    20  *
       
    21  *  * Redistributions in binary form must reproduce the above copyright notice,
       
    22  *       this list of conditions and the following disclaimer in the documentation
       
    23  *       and/or other materials provided with the distribution.
       
    24  *
       
    25  *  * Neither the name of the SimplePie Team nor the names of its contributors
       
    26  *       may be used to endorse or promote products derived from this software
       
    27  *       without specific prior written permission.
       
    28  *
       
    29  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
       
    30  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
       
    31  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
       
    32  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS AND CONTRIBUTORS BE
       
    33  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       
    34  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
       
    35  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
       
    36  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
       
    37  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
       
    38  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    39  * POSSIBILITY OF SUCH DAMAGE.
       
    40  *
       
    41  * @package Requests
       
    42  * @subpackage Utilities
       
    43  * @author Geoffrey Sneddon
       
    44  * @author Steve Minutillo
       
    45  * @copyright 2007-2009 Geoffrey Sneddon and Steve Minutillo
       
    46  * @license http://www.opensource.org/licenses/bsd-license.php
       
    47  * @link http://hg.gsnedders.com/iri/
       
    48  *
       
    49  * @property string $iri IRI we're working with
       
    50  * @property-read string $uri IRI in URI form, {@see to_uri}
       
    51  * @property string $scheme Scheme part of the IRI
       
    52  * @property string $authority Authority part, formatted for a URI (userinfo + host + port)
       
    53  * @property string $iauthority Authority part of the IRI (userinfo + host + port)
       
    54  * @property string $userinfo Userinfo part, formatted for a URI (after '://' and before '@')
       
    55  * @property string $iuserinfo Userinfo part of the IRI (after '://' and before '@')
       
    56  * @property string $host Host part, formatted for a URI
       
    57  * @property string $ihost Host part of the IRI
       
    58  * @property string $port Port part of the IRI (after ':')
       
    59  * @property string $path Path part, formatted for a URI (after first '/')
       
    60  * @property string $ipath Path part of the IRI (after first '/')
       
    61  * @property string $query Query part, formatted for a URI (after '?')
       
    62  * @property string $iquery Query part of the IRI (after '?')
       
    63  * @property string $fragment Fragment, formatted for a URI (after '#')
       
    64  * @property string $ifragment Fragment part of the IRI (after '#')
       
    65  */
       
    66 class Requests_IRI {
       
    67 	/**
       
    68 	 * Scheme
       
    69 	 *
       
    70 	 * @var string
       
    71 	 */
       
    72 	protected $scheme = null;
       
    73 
       
    74 	/**
       
    75 	 * User Information
       
    76 	 *
       
    77 	 * @var string
       
    78 	 */
       
    79 	protected $iuserinfo = null;
       
    80 
       
    81 	/**
       
    82 	 * ihost
       
    83 	 *
       
    84 	 * @var string
       
    85 	 */
       
    86 	protected $ihost = null;
       
    87 
       
    88 	/**
       
    89 	 * Port
       
    90 	 *
       
    91 	 * @var string
       
    92 	 */
       
    93 	protected $port = null;
       
    94 
       
    95 	/**
       
    96 	 * ipath
       
    97 	 *
       
    98 	 * @var string
       
    99 	 */
       
   100 	protected $ipath = '';
       
   101 
       
   102 	/**
       
   103 	 * iquery
       
   104 	 *
       
   105 	 * @var string
       
   106 	 */
       
   107 	protected $iquery = null;
       
   108 
       
   109 	/**
       
   110 	 * ifragment
       
   111 	 *
       
   112 	 * @var string
       
   113 	 */
       
   114 	protected $ifragment = null;
       
   115 
       
   116 	/**
       
   117 	 * Normalization database
       
   118 	 *
       
   119 	 * Each key is the scheme, each value is an array with each key as the IRI
       
   120 	 * part and value as the default value for that part.
       
   121 	 */
       
   122 	protected $normalization = array(
       
   123 		'acap' => array(
       
   124 			'port' => 674
       
   125 		),
       
   126 		'dict' => array(
       
   127 			'port' => 2628
       
   128 		),
       
   129 		'file' => array(
       
   130 			'ihost' => 'localhost'
       
   131 		),
       
   132 		'http' => array(
       
   133 			'port' => 80,
       
   134 		),
       
   135 		'https' => array(
       
   136 			'port' => 443,
       
   137 		),
       
   138 	);
       
   139 
       
   140 	/**
       
   141 	 * Return the entire IRI when you try and read the object as a string
       
   142 	 *
       
   143 	 * @return string
       
   144 	 */
       
   145 	public function __toString() {
       
   146 		return $this->get_iri();
       
   147 	}
       
   148 
       
   149 	/**
       
   150 	 * Overload __set() to provide access via properties
       
   151 	 *
       
   152 	 * @param string $name Property name
       
   153 	 * @param mixed $value Property value
       
   154 	 */
       
   155 	public function __set($name, $value) {
       
   156 		if (method_exists($this, 'set_' . $name)) {
       
   157 			call_user_func(array($this, 'set_' . $name), $value);
       
   158 		}
       
   159 		elseif (
       
   160 			   $name === 'iauthority'
       
   161 			|| $name === 'iuserinfo'
       
   162 			|| $name === 'ihost'
       
   163 			|| $name === 'ipath'
       
   164 			|| $name === 'iquery'
       
   165 			|| $name === 'ifragment'
       
   166 		) {
       
   167 			call_user_func(array($this, 'set_' . substr($name, 1)), $value);
       
   168 		}
       
   169 	}
       
   170 
       
   171 	/**
       
   172 	 * Overload __get() to provide access via properties
       
   173 	 *
       
   174 	 * @param string $name Property name
       
   175 	 * @return mixed
       
   176 	 */
       
   177 	public function __get($name) {
       
   178 		// isset() returns false for null, we don't want to do that
       
   179 		// Also why we use array_key_exists below instead of isset()
       
   180 		$props = get_object_vars($this);
       
   181 
       
   182 		if (
       
   183 			$name === 'iri' ||
       
   184 			$name === 'uri' ||
       
   185 			$name === 'iauthority' ||
       
   186 			$name === 'authority'
       
   187 		) {
       
   188 			$method = 'get_' . $name;
       
   189 			$return = $this->$method();
       
   190 		}
       
   191 		elseif (array_key_exists($name, $props)) {
       
   192 			$return = $this->$name;
       
   193 		}
       
   194 		// host -> ihost
       
   195 		elseif (($prop = 'i' . $name) && array_key_exists($prop, $props)) {
       
   196 			$name = $prop;
       
   197 			$return = $this->$prop;
       
   198 		}
       
   199 		// ischeme -> scheme
       
   200 		elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props)) {
       
   201 			$name = $prop;
       
   202 			$return = $this->$prop;
       
   203 		}
       
   204 		else {
       
   205 			trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE);
       
   206 			$return = null;
       
   207 		}
       
   208 
       
   209 		if ($return === null && isset($this->normalization[$this->scheme][$name])) {
       
   210 			return $this->normalization[$this->scheme][$name];
       
   211 		}
       
   212 		else {
       
   213 			return $return;
       
   214 		}
       
   215 	}
       
   216 
       
   217 	/**
       
   218 	 * Overload __isset() to provide access via properties
       
   219 	 *
       
   220 	 * @param string $name Property name
       
   221 	 * @return bool
       
   222 	 */
       
   223 	public function __isset($name) {
       
   224 		return (method_exists($this, 'get_' . $name) || isset($this->$name));
       
   225 	}
       
   226 
       
   227 	/**
       
   228 	 * Overload __unset() to provide access via properties
       
   229 	 *
       
   230 	 * @param string $name Property name
       
   231 	 */
       
   232 	public function __unset($name) {
       
   233 		if (method_exists($this, 'set_' . $name)) {
       
   234 			call_user_func(array($this, 'set_' . $name), '');
       
   235 		}
       
   236 	}
       
   237 
       
   238 	/**
       
   239 	 * Create a new IRI object, from a specified string
       
   240 	 *
       
   241 	 * @param string|null $iri
       
   242 	 */
       
   243 	public function __construct($iri = null) {
       
   244 		$this->set_iri($iri);
       
   245 	}
       
   246 
       
   247 	/**
       
   248 	 * Create a new IRI object by resolving a relative IRI
       
   249 	 *
       
   250 	 * Returns false if $base is not absolute, otherwise an IRI.
       
   251 	 *
       
   252 	 * @param IRI|string $base (Absolute) Base IRI
       
   253 	 * @param IRI|string $relative Relative IRI
       
   254 	 * @return IRI|false
       
   255 	 */
       
   256 	public static function absolutize($base, $relative) {
       
   257 		if (!($relative instanceof Requests_IRI)) {
       
   258 			$relative = new Requests_IRI($relative);
       
   259 		}
       
   260 		if (!$relative->is_valid()) {
       
   261 			return false;
       
   262 		}
       
   263 		elseif ($relative->scheme !== null) {
       
   264 			return clone $relative;
       
   265 		}
       
   266 
       
   267 		if (!($base instanceof Requests_IRI)) {
       
   268 			$base = new Requests_IRI($base);
       
   269 		}
       
   270 		if ($base->scheme === null || !$base->is_valid()) {
       
   271 			return false;
       
   272 		}
       
   273 
       
   274 		if ($relative->get_iri() !== '') {
       
   275 			if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null) {
       
   276 				$target = clone $relative;
       
   277 				$target->scheme = $base->scheme;
       
   278 			}
       
   279 			else {
       
   280 				$target = new Requests_IRI;
       
   281 				$target->scheme = $base->scheme;
       
   282 				$target->iuserinfo = $base->iuserinfo;
       
   283 				$target->ihost = $base->ihost;
       
   284 				$target->port = $base->port;
       
   285 				if ($relative->ipath !== '') {
       
   286 					if ($relative->ipath[0] === '/') {
       
   287 						$target->ipath = $relative->ipath;
       
   288 					}
       
   289 					elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '') {
       
   290 						$target->ipath = '/' . $relative->ipath;
       
   291 					}
       
   292 					elseif (($last_segment = strrpos($base->ipath, '/')) !== false) {
       
   293 						$target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath;
       
   294 					}
       
   295 					else {
       
   296 						$target->ipath = $relative->ipath;
       
   297 					}
       
   298 					$target->ipath = $target->remove_dot_segments($target->ipath);
       
   299 					$target->iquery = $relative->iquery;
       
   300 				}
       
   301 				else {
       
   302 					$target->ipath = $base->ipath;
       
   303 					if ($relative->iquery !== null) {
       
   304 						$target->iquery = $relative->iquery;
       
   305 					}
       
   306 					elseif ($base->iquery !== null) {
       
   307 						$target->iquery = $base->iquery;
       
   308 					}
       
   309 				}
       
   310 				$target->ifragment = $relative->ifragment;
       
   311 			}
       
   312 		}
       
   313 		else {
       
   314 			$target = clone $base;
       
   315 			$target->ifragment = null;
       
   316 		}
       
   317 		$target->scheme_normalization();
       
   318 		return $target;
       
   319 	}
       
   320 
       
   321 	/**
       
   322 	 * Parse an IRI into scheme/authority/path/query/fragment segments
       
   323 	 *
       
   324 	 * @param string $iri
       
   325 	 * @return array
       
   326 	 */
       
   327 	protected function parse_iri($iri) {
       
   328 		$iri = trim($iri, "\x20\x09\x0A\x0C\x0D");
       
   329 		$has_match = preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match);
       
   330 		if (!$has_match) {
       
   331 			throw new Requests_Exception('Cannot parse supplied IRI', 'iri.cannot_parse', $iri);
       
   332 		}
       
   333 
       
   334 		if ($match[1] === '') {
       
   335 			$match['scheme'] = null;
       
   336 		}
       
   337 		if (!isset($match[3]) || $match[3] === '') {
       
   338 			$match['authority'] = null;
       
   339 		}
       
   340 		if (!isset($match[5])) {
       
   341 			$match['path'] = '';
       
   342 		}
       
   343 		if (!isset($match[6]) || $match[6] === '') {
       
   344 			$match['query'] = null;
       
   345 		}
       
   346 		if (!isset($match[8]) || $match[8] === '') {
       
   347 			$match['fragment'] = null;
       
   348 		}
       
   349 		return $match;
       
   350 	}
       
   351 
       
   352 	/**
       
   353 	 * Remove dot segments from a path
       
   354 	 *
       
   355 	 * @param string $input
       
   356 	 * @return string
       
   357 	 */
       
   358 	protected function remove_dot_segments($input) {
       
   359 		$output = '';
       
   360 		while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..') {
       
   361 			// A: If the input buffer begins with a prefix of "../" or "./",
       
   362 			// then remove that prefix from the input buffer; otherwise,
       
   363 			if (strpos($input, '../') === 0) {
       
   364 				$input = substr($input, 3);
       
   365 			}
       
   366 			elseif (strpos($input, './') === 0) {
       
   367 				$input = substr($input, 2);
       
   368 			}
       
   369 			// B: if the input buffer begins with a prefix of "/./" or "/.",
       
   370 			// where "." is a complete path segment, then replace that prefix
       
   371 			// with "/" in the input buffer; otherwise,
       
   372 			elseif (strpos($input, '/./') === 0) {
       
   373 				$input = substr($input, 2);
       
   374 			}
       
   375 			elseif ($input === '/.') {
       
   376 				$input = '/';
       
   377 			}
       
   378 			// C: if the input buffer begins with a prefix of "/../" or "/..",
       
   379 			// where ".." is a complete path segment, then replace that prefix
       
   380 			// with "/" in the input buffer and remove the last segment and its
       
   381 			// preceding "/" (if any) from the output buffer; otherwise,
       
   382 			elseif (strpos($input, '/../') === 0) {
       
   383 				$input = substr($input, 3);
       
   384 				$output = substr_replace($output, '', strrpos($output, '/'));
       
   385 			}
       
   386 			elseif ($input === '/..') {
       
   387 				$input = '/';
       
   388 				$output = substr_replace($output, '', strrpos($output, '/'));
       
   389 			}
       
   390 			// D: if the input buffer consists only of "." or "..", then remove
       
   391 			// that from the input buffer; otherwise,
       
   392 			elseif ($input === '.' || $input === '..') {
       
   393 				$input = '';
       
   394 			}
       
   395 			// E: move the first path segment in the input buffer to the end of
       
   396 			// the output buffer, including the initial "/" character (if any)
       
   397 			// and any subsequent characters up to, but not including, the next
       
   398 			// "/" character or the end of the input buffer
       
   399 			elseif (($pos = strpos($input, '/', 1)) !== false) {
       
   400 				$output .= substr($input, 0, $pos);
       
   401 				$input = substr_replace($input, '', 0, $pos);
       
   402 			}
       
   403 			else {
       
   404 				$output .= $input;
       
   405 				$input = '';
       
   406 			}
       
   407 		}
       
   408 		return $output . $input;
       
   409 	}
       
   410 
       
   411 	/**
       
   412 	 * Replace invalid character with percent encoding
       
   413 	 *
       
   414 	 * @param string $string Input string
       
   415 	 * @param string $extra_chars Valid characters not in iunreserved or
       
   416 	 *                            iprivate (this is ASCII-only)
       
   417 	 * @param bool $iprivate Allow iprivate
       
   418 	 * @return string
       
   419 	 */
       
   420 	protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false) {
       
   421 		// Normalize as many pct-encoded sections as possible
       
   422 		$string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array(&$this, 'remove_iunreserved_percent_encoded'), $string);
       
   423 
       
   424 		// Replace invalid percent characters
       
   425 		$string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string);
       
   426 
       
   427 		// Add unreserved and % to $extra_chars (the latter is safe because all
       
   428 		// pct-encoded sections are now valid).
       
   429 		$extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%';
       
   430 
       
   431 		// Now replace any bytes that aren't allowed with their pct-encoded versions
       
   432 		$position = 0;
       
   433 		$strlen = strlen($string);
       
   434 		while (($position += strspn($string, $extra_chars, $position)) < $strlen) {
       
   435 			$value = ord($string[$position]);
       
   436 
       
   437 			// Start position
       
   438 			$start = $position;
       
   439 
       
   440 			// By default we are valid
       
   441 			$valid = true;
       
   442 
       
   443 			// No one byte sequences are valid due to the while.
       
   444 			// Two byte sequence:
       
   445 			if (($value & 0xE0) === 0xC0) {
       
   446 				$character = ($value & 0x1F) << 6;
       
   447 				$length = 2;
       
   448 				$remaining = 1;
       
   449 			}
       
   450 			// Three byte sequence:
       
   451 			elseif (($value & 0xF0) === 0xE0) {
       
   452 				$character = ($value & 0x0F) << 12;
       
   453 				$length = 3;
       
   454 				$remaining = 2;
       
   455 			}
       
   456 			// Four byte sequence:
       
   457 			elseif (($value & 0xF8) === 0xF0) {
       
   458 				$character = ($value & 0x07) << 18;
       
   459 				$length = 4;
       
   460 				$remaining = 3;
       
   461 			}
       
   462 			// Invalid byte:
       
   463 			else {
       
   464 				$valid = false;
       
   465 				$length = 1;
       
   466 				$remaining = 0;
       
   467 			}
       
   468 
       
   469 			if ($remaining) {
       
   470 				if ($position + $length <= $strlen) {
       
   471 					for ($position++; $remaining; $position++) {
       
   472 						$value = ord($string[$position]);
       
   473 
       
   474 						// Check that the byte is valid, then add it to the character:
       
   475 						if (($value & 0xC0) === 0x80) {
       
   476 							$character |= ($value & 0x3F) << (--$remaining * 6);
       
   477 						}
       
   478 						// If it is invalid, count the sequence as invalid and reprocess the current byte:
       
   479 						else {
       
   480 							$valid = false;
       
   481 							$position--;
       
   482 							break;
       
   483 						}
       
   484 					}
       
   485 				}
       
   486 				else {
       
   487 					$position = $strlen - 1;
       
   488 					$valid = false;
       
   489 				}
       
   490 			}
       
   491 
       
   492 			// Percent encode anything invalid or not in ucschar
       
   493 			if (
       
   494 				// Invalid sequences
       
   495 				!$valid
       
   496 				// Non-shortest form sequences are invalid
       
   497 				|| $length > 1 && $character <= 0x7F
       
   498 				|| $length > 2 && $character <= 0x7FF
       
   499 				|| $length > 3 && $character <= 0xFFFF
       
   500 				// Outside of range of ucschar codepoints
       
   501 				// Noncharacters
       
   502 				|| ($character & 0xFFFE) === 0xFFFE
       
   503 				|| $character >= 0xFDD0 && $character <= 0xFDEF
       
   504 				|| (
       
   505 					// Everything else not in ucschar
       
   506 					   $character > 0xD7FF && $character < 0xF900
       
   507 					|| $character < 0xA0
       
   508 					|| $character > 0xEFFFD
       
   509 				)
       
   510 				&& (
       
   511 					// Everything not in iprivate, if it applies
       
   512 					   !$iprivate
       
   513 					|| $character < 0xE000
       
   514 					|| $character > 0x10FFFD
       
   515 				)
       
   516 			) {
       
   517 				// If we were a character, pretend we weren't, but rather an error.
       
   518 				if ($valid) {
       
   519 					$position--;
       
   520 				}
       
   521 
       
   522 				for ($j = $start; $j <= $position; $j++) {
       
   523 					$string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1);
       
   524 					$j += 2;
       
   525 					$position += 2;
       
   526 					$strlen += 2;
       
   527 				}
       
   528 			}
       
   529 		}
       
   530 
       
   531 		return $string;
       
   532 	}
       
   533 
       
   534 	/**
       
   535 	 * Callback function for preg_replace_callback.
       
   536 	 *
       
   537 	 * Removes sequences of percent encoded bytes that represent UTF-8
       
   538 	 * encoded characters in iunreserved
       
   539 	 *
       
   540 	 * @param array $match PCRE match
       
   541 	 * @return string Replacement
       
   542 	 */
       
   543 	protected function remove_iunreserved_percent_encoded($match) {
       
   544 		// As we just have valid percent encoded sequences we can just explode
       
   545 		// and ignore the first member of the returned array (an empty string).
       
   546 		$bytes = explode('%', $match[0]);
       
   547 
       
   548 		// Initialize the new string (this is what will be returned) and that
       
   549 		// there are no bytes remaining in the current sequence (unsurprising
       
   550 		// at the first byte!).
       
   551 		$string = '';
       
   552 		$remaining = 0;
       
   553 
       
   554 		// Loop over each and every byte, and set $value to its value
       
   555 		for ($i = 1, $len = count($bytes); $i < $len; $i++) {
       
   556 			$value = hexdec($bytes[$i]);
       
   557 
       
   558 			// If we're the first byte of sequence:
       
   559 			if (!$remaining) {
       
   560 				// Start position
       
   561 				$start = $i;
       
   562 
       
   563 				// By default we are valid
       
   564 				$valid = true;
       
   565 
       
   566 				// One byte sequence:
       
   567 				if ($value <= 0x7F) {
       
   568 					$character = $value;
       
   569 					$length = 1;
       
   570 				}
       
   571 				// Two byte sequence:
       
   572 				elseif (($value & 0xE0) === 0xC0) {
       
   573 					$character = ($value & 0x1F) << 6;
       
   574 					$length = 2;
       
   575 					$remaining = 1;
       
   576 				}
       
   577 				// Three byte sequence:
       
   578 				elseif (($value & 0xF0) === 0xE0) {
       
   579 					$character = ($value & 0x0F) << 12;
       
   580 					$length = 3;
       
   581 					$remaining = 2;
       
   582 				}
       
   583 				// Four byte sequence:
       
   584 				elseif (($value & 0xF8) === 0xF0) {
       
   585 					$character = ($value & 0x07) << 18;
       
   586 					$length = 4;
       
   587 					$remaining = 3;
       
   588 				}
       
   589 				// Invalid byte:
       
   590 				else {
       
   591 					$valid = false;
       
   592 					$remaining = 0;
       
   593 				}
       
   594 			}
       
   595 			// Continuation byte:
       
   596 			else {
       
   597 				// Check that the byte is valid, then add it to the character:
       
   598 				if (($value & 0xC0) === 0x80) {
       
   599 					$remaining--;
       
   600 					$character |= ($value & 0x3F) << ($remaining * 6);
       
   601 				}
       
   602 				// If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
       
   603 				else {
       
   604 					$valid = false;
       
   605 					$remaining = 0;
       
   606 					$i--;
       
   607 				}
       
   608 			}
       
   609 
       
   610 			// If we've reached the end of the current byte sequence, append it to Unicode::$data
       
   611 			if (!$remaining) {
       
   612 				// Percent encode anything invalid or not in iunreserved
       
   613 				if (
       
   614 					// Invalid sequences
       
   615 					!$valid
       
   616 					// Non-shortest form sequences are invalid
       
   617 					|| $length > 1 && $character <= 0x7F
       
   618 					|| $length > 2 && $character <= 0x7FF
       
   619 					|| $length > 3 && $character <= 0xFFFF
       
   620 					// Outside of range of iunreserved codepoints
       
   621 					|| $character < 0x2D
       
   622 					|| $character > 0xEFFFD
       
   623 					// Noncharacters
       
   624 					|| ($character & 0xFFFE) === 0xFFFE
       
   625 					|| $character >= 0xFDD0 && $character <= 0xFDEF
       
   626 					// Everything else not in iunreserved (this is all BMP)
       
   627 					|| $character === 0x2F
       
   628 					|| $character > 0x39 && $character < 0x41
       
   629 					|| $character > 0x5A && $character < 0x61
       
   630 					|| $character > 0x7A && $character < 0x7E
       
   631 					|| $character > 0x7E && $character < 0xA0
       
   632 					|| $character > 0xD7FF && $character < 0xF900
       
   633 				) {
       
   634 					for ($j = $start; $j <= $i; $j++) {
       
   635 						$string .= '%' . strtoupper($bytes[$j]);
       
   636 					}
       
   637 				}
       
   638 				else {
       
   639 					for ($j = $start; $j <= $i; $j++) {
       
   640 						$string .= chr(hexdec($bytes[$j]));
       
   641 					}
       
   642 				}
       
   643 			}
       
   644 		}
       
   645 
       
   646 		// If we have any bytes left over they are invalid (i.e., we are
       
   647 		// mid-way through a multi-byte sequence)
       
   648 		if ($remaining) {
       
   649 			for ($j = $start; $j < $len; $j++) {
       
   650 				$string .= '%' . strtoupper($bytes[$j]);
       
   651 			}
       
   652 		}
       
   653 
       
   654 		return $string;
       
   655 	}
       
   656 
       
   657 	protected function scheme_normalization() {
       
   658 		if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo']) {
       
   659 			$this->iuserinfo = null;
       
   660 		}
       
   661 		if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost']) {
       
   662 			$this->ihost = null;
       
   663 		}
       
   664 		if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port']) {
       
   665 			$this->port = null;
       
   666 		}
       
   667 		if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath']) {
       
   668 			$this->ipath = '';
       
   669 		}
       
   670 		if (isset($this->ihost) && empty($this->ipath)) {
       
   671 			$this->ipath = '/';
       
   672 		}
       
   673 		if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery']) {
       
   674 			$this->iquery = null;
       
   675 		}
       
   676 		if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment']) {
       
   677 			$this->ifragment = null;
       
   678 		}
       
   679 	}
       
   680 
       
   681 	/**
       
   682 	 * Check if the object represents a valid IRI. This needs to be done on each
       
   683 	 * call as some things change depending on another part of the IRI.
       
   684 	 *
       
   685 	 * @return bool
       
   686 	 */
       
   687 	public function is_valid() {
       
   688 		$isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null;
       
   689 		if ($this->ipath !== '' &&
       
   690 			(
       
   691 				$isauthority && $this->ipath[0] !== '/' ||
       
   692 				(
       
   693 					$this->scheme === null &&
       
   694 					!$isauthority &&
       
   695 					strpos($this->ipath, ':') !== false &&
       
   696 					(strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/'))
       
   697 				)
       
   698 			)
       
   699 		) {
       
   700 			return false;
       
   701 		}
       
   702 
       
   703 		return true;
       
   704 	}
       
   705 
       
   706 	/**
       
   707 	 * Set the entire IRI. Returns true on success, false on failure (if there
       
   708 	 * are any invalid characters).
       
   709 	 *
       
   710 	 * @param string $iri
       
   711 	 * @return bool
       
   712 	 */
       
   713 	protected function set_iri($iri) {
       
   714 		static $cache;
       
   715 		if (!$cache) {
       
   716 			$cache = array();
       
   717 		}
       
   718 
       
   719 		if ($iri === null) {
       
   720 			return true;
       
   721 		}
       
   722 		if (isset($cache[$iri])) {
       
   723 			list($this->scheme,
       
   724 				 $this->iuserinfo,
       
   725 				 $this->ihost,
       
   726 				 $this->port,
       
   727 				 $this->ipath,
       
   728 				 $this->iquery,
       
   729 				 $this->ifragment,
       
   730 				 $return) = $cache[$iri];
       
   731 			return $return;
       
   732 		}
       
   733 
       
   734 		$parsed = $this->parse_iri((string) $iri);
       
   735 
       
   736 		$return = $this->set_scheme($parsed['scheme'])
       
   737 			&& $this->set_authority($parsed['authority'])
       
   738 			&& $this->set_path($parsed['path'])
       
   739 			&& $this->set_query($parsed['query'])
       
   740 			&& $this->set_fragment($parsed['fragment']);
       
   741 
       
   742 		$cache[$iri] = array($this->scheme,
       
   743 							 $this->iuserinfo,
       
   744 							 $this->ihost,
       
   745 							 $this->port,
       
   746 							 $this->ipath,
       
   747 							 $this->iquery,
       
   748 							 $this->ifragment,
       
   749 							 $return);
       
   750 		return $return;
       
   751 	}
       
   752 
       
   753 	/**
       
   754 	 * Set the scheme. Returns true on success, false on failure (if there are
       
   755 	 * any invalid characters).
       
   756 	 *
       
   757 	 * @param string $scheme
       
   758 	 * @return bool
       
   759 	 */
       
   760 	protected function set_scheme($scheme) {
       
   761 		if ($scheme === null) {
       
   762 			$this->scheme = null;
       
   763 		}
       
   764 		elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme)) {
       
   765 			$this->scheme = null;
       
   766 			return false;
       
   767 		}
       
   768 		else {
       
   769 			$this->scheme = strtolower($scheme);
       
   770 		}
       
   771 		return true;
       
   772 	}
       
   773 
       
   774 	/**
       
   775 	 * Set the authority. Returns true on success, false on failure (if there are
       
   776 	 * any invalid characters).
       
   777 	 *
       
   778 	 * @param string $authority
       
   779 	 * @return bool
       
   780 	 */
       
   781 	protected function set_authority($authority) {
       
   782 		static $cache;
       
   783 		if (!$cache) {
       
   784 			$cache = array();
       
   785 		}
       
   786 
       
   787 		if ($authority === null) {
       
   788 			$this->iuserinfo = null;
       
   789 			$this->ihost = null;
       
   790 			$this->port = null;
       
   791 			return true;
       
   792 		}
       
   793 		if (isset($cache[$authority])) {
       
   794 			list($this->iuserinfo,
       
   795 				 $this->ihost,
       
   796 				 $this->port,
       
   797 				 $return) = $cache[$authority];
       
   798 
       
   799 			return $return;
       
   800 		}
       
   801 
       
   802 		$remaining = $authority;
       
   803 		if (($iuserinfo_end = strrpos($remaining, '@')) !== false) {
       
   804 			$iuserinfo = substr($remaining, 0, $iuserinfo_end);
       
   805 			$remaining = substr($remaining, $iuserinfo_end + 1);
       
   806 		}
       
   807 		else {
       
   808 			$iuserinfo = null;
       
   809 		}
       
   810 		if (($port_start = strpos($remaining, ':', strpos($remaining, ']'))) !== false) {
       
   811 			$port = substr($remaining, $port_start + 1);
       
   812 			if ($port === false || $port === '') {
       
   813 				$port = null;
       
   814 			}
       
   815 			$remaining = substr($remaining, 0, $port_start);
       
   816 		}
       
   817 		else {
       
   818 			$port = null;
       
   819 		}
       
   820 
       
   821 		$return = $this->set_userinfo($iuserinfo) &&
       
   822 				  $this->set_host($remaining) &&
       
   823 				  $this->set_port($port);
       
   824 
       
   825 		$cache[$authority] = array($this->iuserinfo,
       
   826 								   $this->ihost,
       
   827 								   $this->port,
       
   828 								   $return);
       
   829 
       
   830 		return $return;
       
   831 	}
       
   832 
       
   833 	/**
       
   834 	 * Set the iuserinfo.
       
   835 	 *
       
   836 	 * @param string $iuserinfo
       
   837 	 * @return bool
       
   838 	 */
       
   839 	protected function set_userinfo($iuserinfo) {
       
   840 		if ($iuserinfo === null) {
       
   841 			$this->iuserinfo = null;
       
   842 		}
       
   843 		else {
       
   844 			$this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:');
       
   845 			$this->scheme_normalization();
       
   846 		}
       
   847 
       
   848 		return true;
       
   849 	}
       
   850 
       
   851 	/**
       
   852 	 * Set the ihost. Returns true on success, false on failure (if there are
       
   853 	 * any invalid characters).
       
   854 	 *
       
   855 	 * @param string $ihost
       
   856 	 * @return bool
       
   857 	 */
       
   858 	protected function set_host($ihost) {
       
   859 		if ($ihost === null) {
       
   860 			$this->ihost = null;
       
   861 			return true;
       
   862 		}
       
   863 		if (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']') {
       
   864 			if (Requests_IPv6::check_ipv6(substr($ihost, 1, -1))) {
       
   865 				$this->ihost = '[' . Requests_IPv6::compress(substr($ihost, 1, -1)) . ']';
       
   866 			}
       
   867 			else {
       
   868 				$this->ihost = null;
       
   869 				return false;
       
   870 			}
       
   871 		}
       
   872 		else {
       
   873 			$ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;=');
       
   874 
       
   875 			// Lowercase, but ignore pct-encoded sections (as they should
       
   876 			// remain uppercase). This must be done after the previous step
       
   877 			// as that can add unescaped characters.
       
   878 			$position = 0;
       
   879 			$strlen = strlen($ihost);
       
   880 			while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen) {
       
   881 				if ($ihost[$position] === '%') {
       
   882 					$position += 3;
       
   883 				}
       
   884 				else {
       
   885 					$ihost[$position] = strtolower($ihost[$position]);
       
   886 					$position++;
       
   887 				}
       
   888 			}
       
   889 
       
   890 			$this->ihost = $ihost;
       
   891 		}
       
   892 
       
   893 		$this->scheme_normalization();
       
   894 
       
   895 		return true;
       
   896 	}
       
   897 
       
   898 	/**
       
   899 	 * Set the port. Returns true on success, false on failure (if there are
       
   900 	 * any invalid characters).
       
   901 	 *
       
   902 	 * @param string $port
       
   903 	 * @return bool
       
   904 	 */
       
   905 	protected function set_port($port) {
       
   906 		if ($port === null) {
       
   907 			$this->port = null;
       
   908 			return true;
       
   909 		}
       
   910 
       
   911 		if (strspn($port, '0123456789') === strlen($port)) {
       
   912 			$this->port = (int) $port;
       
   913 			$this->scheme_normalization();
       
   914 			return true;
       
   915 		}
       
   916 
       
   917 		$this->port = null;
       
   918 		return false;
       
   919 	}
       
   920 
       
   921 	/**
       
   922 	 * Set the ipath.
       
   923 	 *
       
   924 	 * @param string $ipath
       
   925 	 * @return bool
       
   926 	 */
       
   927 	protected function set_path($ipath) {
       
   928 		static $cache;
       
   929 		if (!$cache) {
       
   930 			$cache = array();
       
   931 		}
       
   932 
       
   933 		$ipath = (string) $ipath;
       
   934 
       
   935 		if (isset($cache[$ipath])) {
       
   936 			$this->ipath = $cache[$ipath][(int) ($this->scheme !== null)];
       
   937 		}
       
   938 		else {
       
   939 			$valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/');
       
   940 			$removed = $this->remove_dot_segments($valid);
       
   941 
       
   942 			$cache[$ipath] = array($valid, $removed);
       
   943 			$this->ipath = ($this->scheme !== null) ? $removed : $valid;
       
   944 		}
       
   945 		$this->scheme_normalization();
       
   946 		return true;
       
   947 	}
       
   948 
       
   949 	/**
       
   950 	 * Set the iquery.
       
   951 	 *
       
   952 	 * @param string $iquery
       
   953 	 * @return bool
       
   954 	 */
       
   955 	protected function set_query($iquery) {
       
   956 		if ($iquery === null) {
       
   957 			$this->iquery = null;
       
   958 		}
       
   959 		else {
       
   960 			$this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true);
       
   961 			$this->scheme_normalization();
       
   962 		}
       
   963 		return true;
       
   964 	}
       
   965 
       
   966 	/**
       
   967 	 * Set the ifragment.
       
   968 	 *
       
   969 	 * @param string $ifragment
       
   970 	 * @return bool
       
   971 	 */
       
   972 	protected function set_fragment($ifragment) {
       
   973 		if ($ifragment === null) {
       
   974 			$this->ifragment = null;
       
   975 		}
       
   976 		else {
       
   977 			$this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?');
       
   978 			$this->scheme_normalization();
       
   979 		}
       
   980 		return true;
       
   981 	}
       
   982 
       
   983 	/**
       
   984 	 * Convert an IRI to a URI (or parts thereof)
       
   985 	 *
       
   986 	 * @param string|bool IRI to convert (or false from {@see get_iri})
       
   987 	 * @return string|false URI if IRI is valid, false otherwise.
       
   988 	 */
       
   989 	protected function to_uri($string) {
       
   990 		if (!is_string($string)) {
       
   991 			return false;
       
   992 		}
       
   993 
       
   994 		static $non_ascii;
       
   995 		if (!$non_ascii) {
       
   996 			$non_ascii = implode('', range("\x80", "\xFF"));
       
   997 		}
       
   998 
       
   999 		$position = 0;
       
  1000 		$strlen = strlen($string);
       
  1001 		while (($position += strcspn($string, $non_ascii, $position)) < $strlen) {
       
  1002 			$string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1);
       
  1003 			$position += 3;
       
  1004 			$strlen += 2;
       
  1005 		}
       
  1006 
       
  1007 		return $string;
       
  1008 	}
       
  1009 
       
  1010 	/**
       
  1011 	 * Get the complete IRI
       
  1012 	 *
       
  1013 	 * @return string
       
  1014 	 */
       
  1015 	protected function get_iri() {
       
  1016 		if (!$this->is_valid()) {
       
  1017 			return false;
       
  1018 		}
       
  1019 
       
  1020 		$iri = '';
       
  1021 		if ($this->scheme !== null) {
       
  1022 			$iri .= $this->scheme . ':';
       
  1023 		}
       
  1024 		if (($iauthority = $this->get_iauthority()) !== null) {
       
  1025 			$iri .= '//' . $iauthority;
       
  1026 		}
       
  1027 		$iri .= $this->ipath;
       
  1028 		if ($this->iquery !== null) {
       
  1029 			$iri .= '?' . $this->iquery;
       
  1030 		}
       
  1031 		if ($this->ifragment !== null) {
       
  1032 			$iri .= '#' . $this->ifragment;
       
  1033 		}
       
  1034 
       
  1035 		return $iri;
       
  1036 	}
       
  1037 
       
  1038 	/**
       
  1039 	 * Get the complete URI
       
  1040 	 *
       
  1041 	 * @return string
       
  1042 	 */
       
  1043 	protected function get_uri() {
       
  1044 		return $this->to_uri($this->get_iri());
       
  1045 	}
       
  1046 
       
  1047 	/**
       
  1048 	 * Get the complete iauthority
       
  1049 	 *
       
  1050 	 * @return string
       
  1051 	 */
       
  1052 	protected function get_iauthority() {
       
  1053 		if ($this->iuserinfo === null && $this->ihost === null && $this->port === null) {
       
  1054 			return null;
       
  1055 		}
       
  1056 
       
  1057 		$iauthority = '';
       
  1058 		if ($this->iuserinfo !== null) {
       
  1059 			$iauthority .= $this->iuserinfo . '@';
       
  1060 		}
       
  1061 		if ($this->ihost !== null) {
       
  1062 			$iauthority .= $this->ihost;
       
  1063 		}
       
  1064 		if ($this->port !== null) {
       
  1065 			$iauthority .= ':' . $this->port;
       
  1066 		}
       
  1067 		return $iauthority;
       
  1068 	}
       
  1069 
       
  1070 	/**
       
  1071 	 * Get the complete authority
       
  1072 	 *
       
  1073 	 * @return string
       
  1074 	 */
       
  1075 	protected function get_authority() {
       
  1076 		$iauthority = $this->get_iauthority();
       
  1077 		if (is_string($iauthority)) {
       
  1078 			return $this->to_uri($iauthority);
       
  1079 		}
       
  1080 		else {
       
  1081 			return $iauthority;
       
  1082 		}
       
  1083 	}
       
  1084 }