wp/wp-includes/SimplePie/IRI.php
changeset 0 d970ebf37754
child 16 a86126ab1dd4
equal deleted inserted replaced
-1:000000000000 0:d970ebf37754
       
     1 <?php
       
     2 /**
       
     3  * SimplePie
       
     4  *
       
     5  * A PHP-Based RSS and Atom Feed Framework.
       
     6  * Takes the hard work out of managing a complete RSS/Atom solution.
       
     7  *
       
     8  * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
       
     9  * All rights reserved.
       
    10  *
       
    11  * Redistribution and use in source and binary forms, with or without modification, are
       
    12  * permitted provided that the following conditions are met:
       
    13  *
       
    14  * 	* Redistributions of source code must retain the above copyright notice, this list of
       
    15  * 	  conditions and the following disclaimer.
       
    16  *
       
    17  * 	* Redistributions in binary form must reproduce the above copyright notice, this list
       
    18  * 	  of conditions and the following disclaimer in the documentation and/or other materials
       
    19  * 	  provided with the distribution.
       
    20  *
       
    21  * 	* Neither the name of the SimplePie Team nor the names of its contributors may be used
       
    22  * 	  to endorse or promote products derived from this software without specific prior
       
    23  * 	  written permission.
       
    24  *
       
    25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
       
    26  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
       
    27  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
       
    28  * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       
    29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
       
    30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       
    31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
       
    32  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    33  * POSSIBILITY OF SUCH DAMAGE.
       
    34  *
       
    35  * @package SimplePie
       
    36  * @version 1.3.1
       
    37  * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
       
    38  * @author Ryan Parman
       
    39  * @author Geoffrey Sneddon
       
    40  * @author Ryan McCue
       
    41  * @link http://simplepie.org/ SimplePie
       
    42  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
       
    43  */
       
    44 
       
    45 /**
       
    46  * IRI parser/serialiser/normaliser
       
    47  *
       
    48  * @package SimplePie
       
    49  * @subpackage HTTP
       
    50  * @author Geoffrey Sneddon
       
    51  * @author Steve Minutillo
       
    52  * @author Ryan McCue
       
    53  * @copyright 2007-2012 Geoffrey Sneddon, Steve Minutillo, Ryan McCue
       
    54  * @license http://www.opensource.org/licenses/bsd-license.php
       
    55  */
       
    56 class SimplePie_IRI
       
    57 {
       
    58 	/**
       
    59 	 * Scheme
       
    60 	 *
       
    61 	 * @var string
       
    62 	 */
       
    63 	protected $scheme = null;
       
    64 
       
    65 	/**
       
    66 	 * User Information
       
    67 	 *
       
    68 	 * @var string
       
    69 	 */
       
    70 	protected $iuserinfo = null;
       
    71 
       
    72 	/**
       
    73 	 * ihost
       
    74 	 *
       
    75 	 * @var string
       
    76 	 */
       
    77 	protected $ihost = null;
       
    78 
       
    79 	/**
       
    80 	 * Port
       
    81 	 *
       
    82 	 * @var string
       
    83 	 */
       
    84 	protected $port = null;
       
    85 
       
    86 	/**
       
    87 	 * ipath
       
    88 	 *
       
    89 	 * @var string
       
    90 	 */
       
    91 	protected $ipath = '';
       
    92 
       
    93 	/**
       
    94 	 * iquery
       
    95 	 *
       
    96 	 * @var string
       
    97 	 */
       
    98 	protected $iquery = null;
       
    99 
       
   100 	/**
       
   101 	 * ifragment
       
   102 	 *
       
   103 	 * @var string
       
   104 	 */
       
   105 	protected $ifragment = null;
       
   106 
       
   107 	/**
       
   108 	 * Normalization database
       
   109 	 *
       
   110 	 * Each key is the scheme, each value is an array with each key as the IRI
       
   111 	 * part and value as the default value for that part.
       
   112 	 */
       
   113 	protected $normalization = array(
       
   114 		'acap' => array(
       
   115 			'port' => 674
       
   116 		),
       
   117 		'dict' => array(
       
   118 			'port' => 2628
       
   119 		),
       
   120 		'file' => array(
       
   121 			'ihost' => 'localhost'
       
   122 		),
       
   123 		'http' => array(
       
   124 			'port' => 80,
       
   125 			'ipath' => '/'
       
   126 		),
       
   127 		'https' => array(
       
   128 			'port' => 443,
       
   129 			'ipath' => '/'
       
   130 		),
       
   131 	);
       
   132 
       
   133 	/**
       
   134 	 * Return the entire IRI when you try and read the object as a string
       
   135 	 *
       
   136 	 * @return string
       
   137 	 */
       
   138 	public function __toString()
       
   139 	{
       
   140 		return $this->get_iri();
       
   141 	}
       
   142 
       
   143 	/**
       
   144 	 * Overload __set() to provide access via properties
       
   145 	 *
       
   146 	 * @param string $name Property name
       
   147 	 * @param mixed $value Property value
       
   148 	 */
       
   149 	public function __set($name, $value)
       
   150 	{
       
   151 		if (method_exists($this, 'set_' . $name))
       
   152 		{
       
   153 			call_user_func(array($this, 'set_' . $name), $value);
       
   154 		}
       
   155 		elseif (
       
   156 			   $name === 'iauthority'
       
   157 			|| $name === 'iuserinfo'
       
   158 			|| $name === 'ihost'
       
   159 			|| $name === 'ipath'
       
   160 			|| $name === 'iquery'
       
   161 			|| $name === 'ifragment'
       
   162 		)
       
   163 		{
       
   164 			call_user_func(array($this, 'set_' . substr($name, 1)), $value);
       
   165 		}
       
   166 	}
       
   167 
       
   168 	/**
       
   169 	 * Overload __get() to provide access via properties
       
   170 	 *
       
   171 	 * @param string $name Property name
       
   172 	 * @return mixed
       
   173 	 */
       
   174 	public function __get($name)
       
   175 	{
       
   176 		// isset() returns false for null, we don't want to do that
       
   177 		// Also why we use array_key_exists below instead of isset()
       
   178 		$props = get_object_vars($this);
       
   179 
       
   180 		if (
       
   181 			$name === 'iri' ||
       
   182 			$name === 'uri' ||
       
   183 			$name === 'iauthority' ||
       
   184 			$name === 'authority'
       
   185 		)
       
   186 		{
       
   187 			$return = $this->{"get_$name"}();
       
   188 		}
       
   189 		elseif (array_key_exists($name, $props))
       
   190 		{
       
   191 			$return = $this->$name;
       
   192 		}
       
   193 		// host -> ihost
       
   194 		elseif (($prop = 'i' . $name) && array_key_exists($prop, $props))
       
   195 		{
       
   196 			$name = $prop;
       
   197 			$return = $this->$prop;
       
   198 		}
       
   199 		// ischeme -> scheme
       
   200 		elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props))
       
   201 		{
       
   202 			$name = $prop;
       
   203 			$return = $this->$prop;
       
   204 		}
       
   205 		else
       
   206 		{
       
   207 			trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE);
       
   208 			$return = null;
       
   209 		}
       
   210 
       
   211 		if ($return === null && isset($this->normalization[$this->scheme][$name]))
       
   212 		{
       
   213 			return $this->normalization[$this->scheme][$name];
       
   214 		}
       
   215 		else
       
   216 		{
       
   217 			return $return;
       
   218 		}
       
   219 	}
       
   220 
       
   221 	/**
       
   222 	 * Overload __isset() to provide access via properties
       
   223 	 *
       
   224 	 * @param string $name Property name
       
   225 	 * @return bool
       
   226 	 */
       
   227 	public function __isset($name)
       
   228 	{
       
   229 		if (method_exists($this, 'get_' . $name) || isset($this->$name))
       
   230 		{
       
   231 			return true;
       
   232 		}
       
   233 		else
       
   234 		{
       
   235 			return false;
       
   236 		}
       
   237 	}
       
   238 
       
   239 	/**
       
   240 	 * Overload __unset() to provide access via properties
       
   241 	 *
       
   242 	 * @param string $name Property name
       
   243 	 */
       
   244 	public function __unset($name)
       
   245 	{
       
   246 		if (method_exists($this, 'set_' . $name))
       
   247 		{
       
   248 			call_user_func(array($this, 'set_' . $name), '');
       
   249 		}
       
   250 	}
       
   251 
       
   252 	/**
       
   253 	 * Create a new IRI object, from a specified string
       
   254 	 *
       
   255 	 * @param string $iri
       
   256 	 */
       
   257 	public function __construct($iri = null)
       
   258 	{
       
   259 		$this->set_iri($iri);
       
   260 	}
       
   261 
       
   262 	/**
       
   263 	 * Create a new IRI object by resolving a relative IRI
       
   264 	 *
       
   265 	 * Returns false if $base is not absolute, otherwise an IRI.
       
   266 	 *
       
   267 	 * @param IRI|string $base (Absolute) Base IRI
       
   268 	 * @param IRI|string $relative Relative IRI
       
   269 	 * @return IRI|false
       
   270 	 */
       
   271 	public static function absolutize($base, $relative)
       
   272 	{
       
   273 		if (!($relative instanceof SimplePie_IRI))
       
   274 		{
       
   275 			$relative = new SimplePie_IRI($relative);
       
   276 		}
       
   277 		if (!$relative->is_valid())
       
   278 		{
       
   279 			return false;
       
   280 		}
       
   281 		elseif ($relative->scheme !== null)
       
   282 		{
       
   283 			return clone $relative;
       
   284 		}
       
   285 		else
       
   286 		{
       
   287 			if (!($base instanceof SimplePie_IRI))
       
   288 			{
       
   289 				$base = new SimplePie_IRI($base);
       
   290 			}
       
   291 			if ($base->scheme !== null && $base->is_valid())
       
   292 			{
       
   293 				if ($relative->get_iri() !== '')
       
   294 				{
       
   295 					if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null)
       
   296 					{
       
   297 						$target = clone $relative;
       
   298 						$target->scheme = $base->scheme;
       
   299 					}
       
   300 					else
       
   301 					{
       
   302 						$target = new SimplePie_IRI;
       
   303 						$target->scheme = $base->scheme;
       
   304 						$target->iuserinfo = $base->iuserinfo;
       
   305 						$target->ihost = $base->ihost;
       
   306 						$target->port = $base->port;
       
   307 						if ($relative->ipath !== '')
       
   308 						{
       
   309 							if ($relative->ipath[0] === '/')
       
   310 							{
       
   311 								$target->ipath = $relative->ipath;
       
   312 							}
       
   313 							elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '')
       
   314 							{
       
   315 								$target->ipath = '/' . $relative->ipath;
       
   316 							}
       
   317 							elseif (($last_segment = strrpos($base->ipath, '/')) !== false)
       
   318 							{
       
   319 								$target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath;
       
   320 							}
       
   321 							else
       
   322 							{
       
   323 								$target->ipath = $relative->ipath;
       
   324 							}
       
   325 							$target->ipath = $target->remove_dot_segments($target->ipath);
       
   326 							$target->iquery = $relative->iquery;
       
   327 						}
       
   328 						else
       
   329 						{
       
   330 							$target->ipath = $base->ipath;
       
   331 							if ($relative->iquery !== null)
       
   332 							{
       
   333 								$target->iquery = $relative->iquery;
       
   334 							}
       
   335 							elseif ($base->iquery !== null)
       
   336 							{
       
   337 								$target->iquery = $base->iquery;
       
   338 							}
       
   339 						}
       
   340 						$target->ifragment = $relative->ifragment;
       
   341 					}
       
   342 				}
       
   343 				else
       
   344 				{
       
   345 					$target = clone $base;
       
   346 					$target->ifragment = null;
       
   347 				}
       
   348 				$target->scheme_normalization();
       
   349 				return $target;
       
   350 			}
       
   351 			else
       
   352 			{
       
   353 				return false;
       
   354 			}
       
   355 		}
       
   356 	}
       
   357 
       
   358 	/**
       
   359 	 * Parse an IRI into scheme/authority/path/query/fragment segments
       
   360 	 *
       
   361 	 * @param string $iri
       
   362 	 * @return array
       
   363 	 */
       
   364 	protected function parse_iri($iri)
       
   365 	{
       
   366 		$iri = trim($iri, "\x20\x09\x0A\x0C\x0D");
       
   367 		if (preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match))
       
   368 		{
       
   369 			if ($match[1] === '')
       
   370 			{
       
   371 				$match['scheme'] = null;
       
   372 			}
       
   373 			if (!isset($match[3]) || $match[3] === '')
       
   374 			{
       
   375 				$match['authority'] = null;
       
   376 			}
       
   377 			if (!isset($match[5]))
       
   378 			{
       
   379 				$match['path'] = '';
       
   380 			}
       
   381 			if (!isset($match[6]) || $match[6] === '')
       
   382 			{
       
   383 				$match['query'] = null;
       
   384 			}
       
   385 			if (!isset($match[8]) || $match[8] === '')
       
   386 			{
       
   387 				$match['fragment'] = null;
       
   388 			}
       
   389 			return $match;
       
   390 		}
       
   391 		else
       
   392 		{
       
   393 			// This can occur when a paragraph is accidentally parsed as a URI
       
   394 			return false;
       
   395 		}
       
   396 	}
       
   397 
       
   398 	/**
       
   399 	 * Remove dot segments from a path
       
   400 	 *
       
   401 	 * @param string $input
       
   402 	 * @return string
       
   403 	 */
       
   404 	protected function remove_dot_segments($input)
       
   405 	{
       
   406 		$output = '';
       
   407 		while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..')
       
   408 		{
       
   409 			// A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
       
   410 			if (strpos($input, '../') === 0)
       
   411 			{
       
   412 				$input = substr($input, 3);
       
   413 			}
       
   414 			elseif (strpos($input, './') === 0)
       
   415 			{
       
   416 				$input = substr($input, 2);
       
   417 			}
       
   418 			// B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
       
   419 			elseif (strpos($input, '/./') === 0)
       
   420 			{
       
   421 				$input = substr($input, 2);
       
   422 			}
       
   423 			elseif ($input === '/.')
       
   424 			{
       
   425 				$input = '/';
       
   426 			}
       
   427 			// C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
       
   428 			elseif (strpos($input, '/../') === 0)
       
   429 			{
       
   430 				$input = substr($input, 3);
       
   431 				$output = substr_replace($output, '', strrpos($output, '/'));
       
   432 			}
       
   433 			elseif ($input === '/..')
       
   434 			{
       
   435 				$input = '/';
       
   436 				$output = substr_replace($output, '', strrpos($output, '/'));
       
   437 			}
       
   438 			// D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
       
   439 			elseif ($input === '.' || $input === '..')
       
   440 			{
       
   441 				$input = '';
       
   442 			}
       
   443 			// E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
       
   444 			elseif (($pos = strpos($input, '/', 1)) !== false)
       
   445 			{
       
   446 				$output .= substr($input, 0, $pos);
       
   447 				$input = substr_replace($input, '', 0, $pos);
       
   448 			}
       
   449 			else
       
   450 			{
       
   451 				$output .= $input;
       
   452 				$input = '';
       
   453 			}
       
   454 		}
       
   455 		return $output . $input;
       
   456 	}
       
   457 
       
   458 	/**
       
   459 	 * Replace invalid character with percent encoding
       
   460 	 *
       
   461 	 * @param string $string Input string
       
   462 	 * @param string $extra_chars Valid characters not in iunreserved or
       
   463 	 *                            iprivate (this is ASCII-only)
       
   464 	 * @param bool $iprivate Allow iprivate
       
   465 	 * @return string
       
   466 	 */
       
   467 	protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false)
       
   468 	{
       
   469 		// Normalize as many pct-encoded sections as possible
       
   470 		$string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', array($this, 'remove_iunreserved_percent_encoded'), $string);
       
   471 
       
   472 		// Replace invalid percent characters
       
   473 		$string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string);
       
   474 
       
   475 		// Add unreserved and % to $extra_chars (the latter is safe because all
       
   476 		// pct-encoded sections are now valid).
       
   477 		$extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%';
       
   478 
       
   479 		// Now replace any bytes that aren't allowed with their pct-encoded versions
       
   480 		$position = 0;
       
   481 		$strlen = strlen($string);
       
   482 		while (($position += strspn($string, $extra_chars, $position)) < $strlen)
       
   483 		{
       
   484 			$value = ord($string[$position]);
       
   485 
       
   486 			// Start position
       
   487 			$start = $position;
       
   488 
       
   489 			// By default we are valid
       
   490 			$valid = true;
       
   491 
       
   492 			// No one byte sequences are valid due to the while.
       
   493 			// Two byte sequence:
       
   494 			if (($value & 0xE0) === 0xC0)
       
   495 			{
       
   496 				$character = ($value & 0x1F) << 6;
       
   497 				$length = 2;
       
   498 				$remaining = 1;
       
   499 			}
       
   500 			// Three byte sequence:
       
   501 			elseif (($value & 0xF0) === 0xE0)
       
   502 			{
       
   503 				$character = ($value & 0x0F) << 12;
       
   504 				$length = 3;
       
   505 				$remaining = 2;
       
   506 			}
       
   507 			// Four byte sequence:
       
   508 			elseif (($value & 0xF8) === 0xF0)
       
   509 			{
       
   510 				$character = ($value & 0x07) << 18;
       
   511 				$length = 4;
       
   512 				$remaining = 3;
       
   513 			}
       
   514 			// Invalid byte:
       
   515 			else
       
   516 			{
       
   517 				$valid = false;
       
   518 				$length = 1;
       
   519 				$remaining = 0;
       
   520 			}
       
   521 
       
   522 			if ($remaining)
       
   523 			{
       
   524 				if ($position + $length <= $strlen)
       
   525 				{
       
   526 					for ($position++; $remaining; $position++)
       
   527 					{
       
   528 						$value = ord($string[$position]);
       
   529 
       
   530 						// Check that the byte is valid, then add it to the character:
       
   531 						if (($value & 0xC0) === 0x80)
       
   532 						{
       
   533 							$character |= ($value & 0x3F) << (--$remaining * 6);
       
   534 						}
       
   535 						// If it is invalid, count the sequence as invalid and reprocess the current byte:
       
   536 						else
       
   537 						{
       
   538 							$valid = false;
       
   539 							$position--;
       
   540 							break;
       
   541 						}
       
   542 					}
       
   543 				}
       
   544 				else
       
   545 				{
       
   546 					$position = $strlen - 1;
       
   547 					$valid = false;
       
   548 				}
       
   549 			}
       
   550 
       
   551 			// Percent encode anything invalid or not in ucschar
       
   552 			if (
       
   553 				// Invalid sequences
       
   554 				!$valid
       
   555 				// Non-shortest form sequences are invalid
       
   556 				|| $length > 1 && $character <= 0x7F
       
   557 				|| $length > 2 && $character <= 0x7FF
       
   558 				|| $length > 3 && $character <= 0xFFFF
       
   559 				// Outside of range of ucschar codepoints
       
   560 				// Noncharacters
       
   561 				|| ($character & 0xFFFE) === 0xFFFE
       
   562 				|| $character >= 0xFDD0 && $character <= 0xFDEF
       
   563 				|| (
       
   564 					// Everything else not in ucschar
       
   565 					   $character > 0xD7FF && $character < 0xF900
       
   566 					|| $character < 0xA0
       
   567 					|| $character > 0xEFFFD
       
   568 				)
       
   569 				&& (
       
   570 					// Everything not in iprivate, if it applies
       
   571 					   !$iprivate
       
   572 					|| $character < 0xE000
       
   573 					|| $character > 0x10FFFD
       
   574 				)
       
   575 			)
       
   576 			{
       
   577 				// If we were a character, pretend we weren't, but rather an error.
       
   578 				if ($valid)
       
   579 					$position--;
       
   580 
       
   581 				for ($j = $start; $j <= $position; $j++)
       
   582 				{
       
   583 					$string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1);
       
   584 					$j += 2;
       
   585 					$position += 2;
       
   586 					$strlen += 2;
       
   587 				}
       
   588 			}
       
   589 		}
       
   590 
       
   591 		return $string;
       
   592 	}
       
   593 
       
   594 	/**
       
   595 	 * Callback function for preg_replace_callback.
       
   596 	 *
       
   597 	 * Removes sequences of percent encoded bytes that represent UTF-8
       
   598 	 * encoded characters in iunreserved
       
   599 	 *
       
   600 	 * @param array $match PCRE match
       
   601 	 * @return string Replacement
       
   602 	 */
       
   603 	protected function remove_iunreserved_percent_encoded($match)
       
   604 	{
       
   605 		// As we just have valid percent encoded sequences we can just explode
       
   606 		// and ignore the first member of the returned array (an empty string).
       
   607 		$bytes = explode('%', $match[0]);
       
   608 
       
   609 		// Initialize the new string (this is what will be returned) and that
       
   610 		// there are no bytes remaining in the current sequence (unsurprising
       
   611 		// at the first byte!).
       
   612 		$string = '';
       
   613 		$remaining = 0;
       
   614 
       
   615 		// Loop over each and every byte, and set $value to its value
       
   616 		for ($i = 1, $len = count($bytes); $i < $len; $i++)
       
   617 		{
       
   618 			$value = hexdec($bytes[$i]);
       
   619 
       
   620 			// If we're the first byte of sequence:
       
   621 			if (!$remaining)
       
   622 			{
       
   623 				// Start position
       
   624 				$start = $i;
       
   625 
       
   626 				// By default we are valid
       
   627 				$valid = true;
       
   628 
       
   629 				// One byte sequence:
       
   630 				if ($value <= 0x7F)
       
   631 				{
       
   632 					$character = $value;
       
   633 					$length = 1;
       
   634 				}
       
   635 				// Two byte sequence:
       
   636 				elseif (($value & 0xE0) === 0xC0)
       
   637 				{
       
   638 					$character = ($value & 0x1F) << 6;
       
   639 					$length = 2;
       
   640 					$remaining = 1;
       
   641 				}
       
   642 				// Three byte sequence:
       
   643 				elseif (($value & 0xF0) === 0xE0)
       
   644 				{
       
   645 					$character = ($value & 0x0F) << 12;
       
   646 					$length = 3;
       
   647 					$remaining = 2;
       
   648 				}
       
   649 				// Four byte sequence:
       
   650 				elseif (($value & 0xF8) === 0xF0)
       
   651 				{
       
   652 					$character = ($value & 0x07) << 18;
       
   653 					$length = 4;
       
   654 					$remaining = 3;
       
   655 				}
       
   656 				// Invalid byte:
       
   657 				else
       
   658 				{
       
   659 					$valid = false;
       
   660 					$remaining = 0;
       
   661 				}
       
   662 			}
       
   663 			// Continuation byte:
       
   664 			else
       
   665 			{
       
   666 				// Check that the byte is valid, then add it to the character:
       
   667 				if (($value & 0xC0) === 0x80)
       
   668 				{
       
   669 					$remaining--;
       
   670 					$character |= ($value & 0x3F) << ($remaining * 6);
       
   671 				}
       
   672 				// If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
       
   673 				else
       
   674 				{
       
   675 					$valid = false;
       
   676 					$remaining = 0;
       
   677 					$i--;
       
   678 				}
       
   679 			}
       
   680 
       
   681 			// If we've reached the end of the current byte sequence, append it to Unicode::$data
       
   682 			if (!$remaining)
       
   683 			{
       
   684 				// Percent encode anything invalid or not in iunreserved
       
   685 				if (
       
   686 					// Invalid sequences
       
   687 					!$valid
       
   688 					// Non-shortest form sequences are invalid
       
   689 					|| $length > 1 && $character <= 0x7F
       
   690 					|| $length > 2 && $character <= 0x7FF
       
   691 					|| $length > 3 && $character <= 0xFFFF
       
   692 					// Outside of range of iunreserved codepoints
       
   693 					|| $character < 0x2D
       
   694 					|| $character > 0xEFFFD
       
   695 					// Noncharacters
       
   696 					|| ($character & 0xFFFE) === 0xFFFE
       
   697 					|| $character >= 0xFDD0 && $character <= 0xFDEF
       
   698 					// Everything else not in iunreserved (this is all BMP)
       
   699 					|| $character === 0x2F
       
   700 					|| $character > 0x39 && $character < 0x41
       
   701 					|| $character > 0x5A && $character < 0x61
       
   702 					|| $character > 0x7A && $character < 0x7E
       
   703 					|| $character > 0x7E && $character < 0xA0
       
   704 					|| $character > 0xD7FF && $character < 0xF900
       
   705 				)
       
   706 				{
       
   707 					for ($j = $start; $j <= $i; $j++)
       
   708 					{
       
   709 						$string .= '%' . strtoupper($bytes[$j]);
       
   710 					}
       
   711 				}
       
   712 				else
       
   713 				{
       
   714 					for ($j = $start; $j <= $i; $j++)
       
   715 					{
       
   716 						$string .= chr(hexdec($bytes[$j]));
       
   717 					}
       
   718 				}
       
   719 			}
       
   720 		}
       
   721 
       
   722 		// If we have any bytes left over they are invalid (i.e., we are
       
   723 		// mid-way through a multi-byte sequence)
       
   724 		if ($remaining)
       
   725 		{
       
   726 			for ($j = $start; $j < $len; $j++)
       
   727 			{
       
   728 				$string .= '%' . strtoupper($bytes[$j]);
       
   729 			}
       
   730 		}
       
   731 
       
   732 		return $string;
       
   733 	}
       
   734 
       
   735 	protected function scheme_normalization()
       
   736 	{
       
   737 		if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo'])
       
   738 		{
       
   739 			$this->iuserinfo = null;
       
   740 		}
       
   741 		if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost'])
       
   742 		{
       
   743 			$this->ihost = null;
       
   744 		}
       
   745 		if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port'])
       
   746 		{
       
   747 			$this->port = null;
       
   748 		}
       
   749 		if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath'])
       
   750 		{
       
   751 			$this->ipath = '';
       
   752 		}
       
   753 		if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery'])
       
   754 		{
       
   755 			$this->iquery = null;
       
   756 		}
       
   757 		if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment'])
       
   758 		{
       
   759 			$this->ifragment = null;
       
   760 		}
       
   761 	}
       
   762 
       
   763 	/**
       
   764 	 * Check if the object represents a valid IRI. This needs to be done on each
       
   765 	 * call as some things change depending on another part of the IRI.
       
   766 	 *
       
   767 	 * @return bool
       
   768 	 */
       
   769 	public function is_valid()
       
   770 	{
       
   771 		$isauthority = $this->iuserinfo !== null || $this->ihost !== null || $this->port !== null;
       
   772 		if ($this->ipath !== '' &&
       
   773 			(
       
   774 				$isauthority && (
       
   775 					$this->ipath[0] !== '/' ||
       
   776 					substr($this->ipath, 0, 2) === '//'
       
   777 				) ||
       
   778 				(
       
   779 					$this->scheme === null &&
       
   780 					!$isauthority &&
       
   781 					strpos($this->ipath, ':') !== false &&
       
   782 					(strpos($this->ipath, '/') === false ? true : strpos($this->ipath, ':') < strpos($this->ipath, '/'))
       
   783 				)
       
   784 			)
       
   785 		)
       
   786 		{
       
   787 			return false;
       
   788 		}
       
   789 
       
   790 		return true;
       
   791 	}
       
   792 
       
   793 	/**
       
   794 	 * Set the entire IRI. Returns true on success, false on failure (if there
       
   795 	 * are any invalid characters).
       
   796 	 *
       
   797 	 * @param string $iri
       
   798 	 * @return bool
       
   799 	 */
       
   800 	public function set_iri($iri)
       
   801 	{
       
   802 		static $cache;
       
   803 		if (!$cache)
       
   804 		{
       
   805 			$cache = array();
       
   806 		}
       
   807 
       
   808 		if ($iri === null)
       
   809 		{
       
   810 			return true;
       
   811 		}
       
   812 		elseif (isset($cache[$iri]))
       
   813 		{
       
   814 			list($this->scheme,
       
   815 				 $this->iuserinfo,
       
   816 				 $this->ihost,
       
   817 				 $this->port,
       
   818 				 $this->ipath,
       
   819 				 $this->iquery,
       
   820 				 $this->ifragment,
       
   821 				 $return) = $cache[$iri];
       
   822 			return $return;
       
   823 		}
       
   824 		else
       
   825 		{
       
   826 			$parsed = $this->parse_iri((string) $iri);
       
   827 			if (!$parsed)
       
   828 			{
       
   829 				return false;
       
   830 			}
       
   831 
       
   832 			$return = $this->set_scheme($parsed['scheme'])
       
   833 				&& $this->set_authority($parsed['authority'])
       
   834 				&& $this->set_path($parsed['path'])
       
   835 				&& $this->set_query($parsed['query'])
       
   836 				&& $this->set_fragment($parsed['fragment']);
       
   837 
       
   838 			$cache[$iri] = array($this->scheme,
       
   839 								 $this->iuserinfo,
       
   840 								 $this->ihost,
       
   841 								 $this->port,
       
   842 								 $this->ipath,
       
   843 								 $this->iquery,
       
   844 								 $this->ifragment,
       
   845 								 $return);
       
   846 			return $return;
       
   847 		}
       
   848 	}
       
   849 
       
   850 	/**
       
   851 	 * Set the scheme. Returns true on success, false on failure (if there are
       
   852 	 * any invalid characters).
       
   853 	 *
       
   854 	 * @param string $scheme
       
   855 	 * @return bool
       
   856 	 */
       
   857 	public function set_scheme($scheme)
       
   858 	{
       
   859 		if ($scheme === null)
       
   860 		{
       
   861 			$this->scheme = null;
       
   862 		}
       
   863 		elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme))
       
   864 		{
       
   865 			$this->scheme = null;
       
   866 			return false;
       
   867 		}
       
   868 		else
       
   869 		{
       
   870 			$this->scheme = strtolower($scheme);
       
   871 		}
       
   872 		return true;
       
   873 	}
       
   874 
       
   875 	/**
       
   876 	 * Set the authority. Returns true on success, false on failure (if there are
       
   877 	 * any invalid characters).
       
   878 	 *
       
   879 	 * @param string $authority
       
   880 	 * @return bool
       
   881 	 */
       
   882 	public function set_authority($authority)
       
   883 	{
       
   884 		static $cache;
       
   885 		if (!$cache)
       
   886 			$cache = array();
       
   887 
       
   888 		if ($authority === null)
       
   889 		{
       
   890 			$this->iuserinfo = null;
       
   891 			$this->ihost = null;
       
   892 			$this->port = null;
       
   893 			return true;
       
   894 		}
       
   895 		elseif (isset($cache[$authority]))
       
   896 		{
       
   897 			list($this->iuserinfo,
       
   898 				 $this->ihost,
       
   899 				 $this->port,
       
   900 				 $return) = $cache[$authority];
       
   901 
       
   902 			return $return;
       
   903 		}
       
   904 		else
       
   905 		{
       
   906 			$remaining = $authority;
       
   907 			if (($iuserinfo_end = strrpos($remaining, '@')) !== false)
       
   908 			{
       
   909 				$iuserinfo = substr($remaining, 0, $iuserinfo_end);
       
   910 				$remaining = substr($remaining, $iuserinfo_end + 1);
       
   911 			}
       
   912 			else
       
   913 			{
       
   914 				$iuserinfo = null;
       
   915 			}
       
   916 			if (($port_start = strpos($remaining, ':', strpos($remaining, ']'))) !== false)
       
   917 			{
       
   918 				if (($port = substr($remaining, $port_start + 1)) === false)
       
   919 				{
       
   920 					$port = null;
       
   921 				}
       
   922 				$remaining = substr($remaining, 0, $port_start);
       
   923 			}
       
   924 			else
       
   925 			{
       
   926 				$port = null;
       
   927 			}
       
   928 
       
   929 			$return = $this->set_userinfo($iuserinfo) &&
       
   930 					  $this->set_host($remaining) &&
       
   931 					  $this->set_port($port);
       
   932 
       
   933 			$cache[$authority] = array($this->iuserinfo,
       
   934 									   $this->ihost,
       
   935 									   $this->port,
       
   936 									   $return);
       
   937 
       
   938 			return $return;
       
   939 		}
       
   940 	}
       
   941 
       
   942 	/**
       
   943 	 * Set the iuserinfo.
       
   944 	 *
       
   945 	 * @param string $iuserinfo
       
   946 	 * @return bool
       
   947 	 */
       
   948 	public function set_userinfo($iuserinfo)
       
   949 	{
       
   950 		if ($iuserinfo === null)
       
   951 		{
       
   952 			$this->iuserinfo = null;
       
   953 		}
       
   954 		else
       
   955 		{
       
   956 			$this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:');
       
   957 			$this->scheme_normalization();
       
   958 		}
       
   959 
       
   960 		return true;
       
   961 	}
       
   962 
       
   963 	/**
       
   964 	 * Set the ihost. Returns true on success, false on failure (if there are
       
   965 	 * any invalid characters).
       
   966 	 *
       
   967 	 * @param string $ihost
       
   968 	 * @return bool
       
   969 	 */
       
   970 	public function set_host($ihost)
       
   971 	{
       
   972 		if ($ihost === null)
       
   973 		{
       
   974 			$this->ihost = null;
       
   975 			return true;
       
   976 		}
       
   977 		elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']')
       
   978 		{
       
   979 			if (SimplePie_Net_IPv6::check_ipv6(substr($ihost, 1, -1)))
       
   980 			{
       
   981 				$this->ihost = '[' . SimplePie_Net_IPv6::compress(substr($ihost, 1, -1)) . ']';
       
   982 			}
       
   983 			else
       
   984 			{
       
   985 				$this->ihost = null;
       
   986 				return false;
       
   987 			}
       
   988 		}
       
   989 		else
       
   990 		{
       
   991 			$ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;=');
       
   992 
       
   993 			// Lowercase, but ignore pct-encoded sections (as they should
       
   994 			// remain uppercase). This must be done after the previous step
       
   995 			// as that can add unescaped characters.
       
   996 			$position = 0;
       
   997 			$strlen = strlen($ihost);
       
   998 			while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen)
       
   999 			{
       
  1000 				if ($ihost[$position] === '%')
       
  1001 				{
       
  1002 					$position += 3;
       
  1003 				}
       
  1004 				else
       
  1005 				{
       
  1006 					$ihost[$position] = strtolower($ihost[$position]);
       
  1007 					$position++;
       
  1008 				}
       
  1009 			}
       
  1010 
       
  1011 			$this->ihost = $ihost;
       
  1012 		}
       
  1013 
       
  1014 		$this->scheme_normalization();
       
  1015 
       
  1016 		return true;
       
  1017 	}
       
  1018 
       
  1019 	/**
       
  1020 	 * Set the port. Returns true on success, false on failure (if there are
       
  1021 	 * any invalid characters).
       
  1022 	 *
       
  1023 	 * @param string $port
       
  1024 	 * @return bool
       
  1025 	 */
       
  1026 	public function set_port($port)
       
  1027 	{
       
  1028 		if ($port === null)
       
  1029 		{
       
  1030 			$this->port = null;
       
  1031 			return true;
       
  1032 		}
       
  1033 		elseif (strspn($port, '0123456789') === strlen($port))
       
  1034 		{
       
  1035 			$this->port = (int) $port;
       
  1036 			$this->scheme_normalization();
       
  1037 			return true;
       
  1038 		}
       
  1039 		else
       
  1040 		{
       
  1041 			$this->port = null;
       
  1042 			return false;
       
  1043 		}
       
  1044 	}
       
  1045 
       
  1046 	/**
       
  1047 	 * Set the ipath.
       
  1048 	 *
       
  1049 	 * @param string $ipath
       
  1050 	 * @return bool
       
  1051 	 */
       
  1052 	public function set_path($ipath)
       
  1053 	{
       
  1054 		static $cache;
       
  1055 		if (!$cache)
       
  1056 		{
       
  1057 			$cache = array();
       
  1058 		}
       
  1059 
       
  1060 		$ipath = (string) $ipath;
       
  1061 
       
  1062 		if (isset($cache[$ipath]))
       
  1063 		{
       
  1064 			$this->ipath = $cache[$ipath][(int) ($this->scheme !== null)];
       
  1065 		}
       
  1066 		else
       
  1067 		{
       
  1068 			$valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/');
       
  1069 			$removed = $this->remove_dot_segments($valid);
       
  1070 
       
  1071 			$cache[$ipath] = array($valid, $removed);
       
  1072 			$this->ipath =  ($this->scheme !== null) ? $removed : $valid;
       
  1073 		}
       
  1074 
       
  1075 		$this->scheme_normalization();
       
  1076 		return true;
       
  1077 	}
       
  1078 
       
  1079 	/**
       
  1080 	 * Set the iquery.
       
  1081 	 *
       
  1082 	 * @param string $iquery
       
  1083 	 * @return bool
       
  1084 	 */
       
  1085 	public function set_query($iquery)
       
  1086 	{
       
  1087 		if ($iquery === null)
       
  1088 		{
       
  1089 			$this->iquery = null;
       
  1090 		}
       
  1091 		else
       
  1092 		{
       
  1093 			$this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true);
       
  1094 			$this->scheme_normalization();
       
  1095 		}
       
  1096 		return true;
       
  1097 	}
       
  1098 
       
  1099 	/**
       
  1100 	 * Set the ifragment.
       
  1101 	 *
       
  1102 	 * @param string $ifragment
       
  1103 	 * @return bool
       
  1104 	 */
       
  1105 	public function set_fragment($ifragment)
       
  1106 	{
       
  1107 		if ($ifragment === null)
       
  1108 		{
       
  1109 			$this->ifragment = null;
       
  1110 		}
       
  1111 		else
       
  1112 		{
       
  1113 			$this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?');
       
  1114 			$this->scheme_normalization();
       
  1115 		}
       
  1116 		return true;
       
  1117 	}
       
  1118 
       
  1119 	/**
       
  1120 	 * Convert an IRI to a URI (or parts thereof)
       
  1121 	 *
       
  1122 	 * @return string
       
  1123 	 */
       
  1124 	public function to_uri($string)
       
  1125 	{
       
  1126 		static $non_ascii;
       
  1127 		if (!$non_ascii)
       
  1128 		{
       
  1129 			$non_ascii = implode('', range("\x80", "\xFF"));
       
  1130 		}
       
  1131 
       
  1132 		$position = 0;
       
  1133 		$strlen = strlen($string);
       
  1134 		while (($position += strcspn($string, $non_ascii, $position)) < $strlen)
       
  1135 		{
       
  1136 			$string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1);
       
  1137 			$position += 3;
       
  1138 			$strlen += 2;
       
  1139 		}
       
  1140 
       
  1141 		return $string;
       
  1142 	}
       
  1143 
       
  1144 	/**
       
  1145 	 * Get the complete IRI
       
  1146 	 *
       
  1147 	 * @return string
       
  1148 	 */
       
  1149 	public function get_iri()
       
  1150 	{
       
  1151 		if (!$this->is_valid())
       
  1152 		{
       
  1153 			return false;
       
  1154 		}
       
  1155 
       
  1156 		$iri = '';
       
  1157 		if ($this->scheme !== null)
       
  1158 		{
       
  1159 			$iri .= $this->scheme . ':';
       
  1160 		}
       
  1161 		if (($iauthority = $this->get_iauthority()) !== null)
       
  1162 		{
       
  1163 			$iri .= '//' . $iauthority;
       
  1164 		}
       
  1165 		if ($this->ipath !== '')
       
  1166 		{
       
  1167 			$iri .= $this->ipath;
       
  1168 		}
       
  1169 		elseif (!empty($this->normalization[$this->scheme]['ipath']) && $iauthority !== null && $iauthority !== '')
       
  1170 		{
       
  1171 			$iri .= $this->normalization[$this->scheme]['ipath'];
       
  1172 		}
       
  1173 		if ($this->iquery !== null)
       
  1174 		{
       
  1175 			$iri .= '?' . $this->iquery;
       
  1176 		}
       
  1177 		if ($this->ifragment !== null)
       
  1178 		{
       
  1179 			$iri .= '#' . $this->ifragment;
       
  1180 		}
       
  1181 
       
  1182 		return $iri;
       
  1183 	}
       
  1184 
       
  1185 	/**
       
  1186 	 * Get the complete URI
       
  1187 	 *
       
  1188 	 * @return string
       
  1189 	 */
       
  1190 	public function get_uri()
       
  1191 	{
       
  1192 		return $this->to_uri($this->get_iri());
       
  1193 	}
       
  1194 
       
  1195 	/**
       
  1196 	 * Get the complete iauthority
       
  1197 	 *
       
  1198 	 * @return string
       
  1199 	 */
       
  1200 	protected function get_iauthority()
       
  1201 	{
       
  1202 		if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null)
       
  1203 		{
       
  1204 			$iauthority = '';
       
  1205 			if ($this->iuserinfo !== null)
       
  1206 			{
       
  1207 				$iauthority .= $this->iuserinfo . '@';
       
  1208 			}
       
  1209 			if ($this->ihost !== null)
       
  1210 			{
       
  1211 				$iauthority .= $this->ihost;
       
  1212 			}
       
  1213 			if ($this->port !== null)
       
  1214 			{
       
  1215 				$iauthority .= ':' . $this->port;
       
  1216 			}
       
  1217 			return $iauthority;
       
  1218 		}
       
  1219 		else
       
  1220 		{
       
  1221 			return null;
       
  1222 		}
       
  1223 	}
       
  1224 
       
  1225 	/**
       
  1226 	 * Get the complete authority
       
  1227 	 *
       
  1228 	 * @return string
       
  1229 	 */
       
  1230 	protected function get_authority()
       
  1231 	{
       
  1232 		$iauthority = $this->get_iauthority();
       
  1233 		if (is_string($iauthority))
       
  1234 			return $this->to_uri($iauthority);
       
  1235 		else
       
  1236 			return $iauthority;
       
  1237 	}
       
  1238 }