wp/wp-includes/SimplePie/src/IRI.php
changeset 22 8c2e4d02f4ef
equal deleted inserted replaced
21:48c4eec2b7e6 22:8c2e4d02f4ef
       
     1 <?php
       
     2 
       
     3 /**
       
     4  * SimplePie
       
     5  *
       
     6  * A PHP-Based RSS and Atom Feed Framework.
       
     7  * Takes the hard work out of managing a complete RSS/Atom solution.
       
     8  *
       
     9  * Copyright (c) 2004-2022, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
       
    10  * All rights reserved.
       
    11  *
       
    12  * Redistribution and use in source and binary forms, with or without modification, are
       
    13  * permitted provided that the following conditions are met:
       
    14  *
       
    15  * 	* Redistributions of source code must retain the above copyright notice, this list of
       
    16  * 	  conditions and the following disclaimer.
       
    17  *
       
    18  * 	* Redistributions in binary form must reproduce the above copyright notice, this list
       
    19  * 	  of conditions and the following disclaimer in the documentation and/or other materials
       
    20  * 	  provided with the distribution.
       
    21  *
       
    22  * 	* Neither the name of the SimplePie Team nor the names of its contributors may be used
       
    23  * 	  to endorse or promote products derived from this software without specific prior
       
    24  * 	  written permission.
       
    25  *
       
    26  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
       
    27  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
       
    28  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
       
    29  * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       
    30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
       
    31  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       
    32  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
       
    33  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    34  * POSSIBILITY OF SUCH DAMAGE.
       
    35  *
       
    36  * @package SimplePie
       
    37  * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
       
    38  * @author Ryan Parman
       
    39  * @author Sam Sneddon
       
    40  * @author Ryan McCue
       
    41  * @link http://simplepie.org/ SimplePie
       
    42  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
       
    43  */
       
    44 
       
    45 namespace SimplePie;
       
    46 
       
    47 /**
       
    48  * IRI parser/serialiser/normaliser
       
    49  *
       
    50  * @package SimplePie
       
    51  * @subpackage HTTP
       
    52  * @author Sam Sneddon
       
    53  * @author Steve Minutillo
       
    54  * @author Ryan McCue
       
    55  * @copyright 2007-2012 Sam Sneddon, Steve Minutillo, Ryan McCue
       
    56  * @license http://www.opensource.org/licenses/bsd-license.php
       
    57  */
       
    58 class IRI
       
    59 {
       
    60     /**
       
    61      * Scheme
       
    62      *
       
    63      * @var string
       
    64      */
       
    65     protected $scheme = null;
       
    66 
       
    67     /**
       
    68      * User Information
       
    69      *
       
    70      * @var string
       
    71      */
       
    72     protected $iuserinfo = null;
       
    73 
       
    74     /**
       
    75      * ihost
       
    76      *
       
    77      * @var string
       
    78      */
       
    79     protected $ihost = null;
       
    80 
       
    81     /**
       
    82      * Port
       
    83      *
       
    84      * @var string
       
    85      */
       
    86     protected $port = null;
       
    87 
       
    88     /**
       
    89      * ipath
       
    90      *
       
    91      * @var string
       
    92      */
       
    93     protected $ipath = '';
       
    94 
       
    95     /**
       
    96      * iquery
       
    97      *
       
    98      * @var string
       
    99      */
       
   100     protected $iquery = null;
       
   101 
       
   102     /**
       
   103      * ifragment
       
   104      *
       
   105      * @var string
       
   106      */
       
   107     protected $ifragment = null;
       
   108 
       
   109     /**
       
   110      * Normalization database
       
   111      *
       
   112      * Each key is the scheme, each value is an array with each key as the IRI
       
   113      * part and value as the default value for that part.
       
   114      */
       
   115     protected $normalization = [
       
   116         'acap' => [
       
   117             'port' => 674
       
   118         ],
       
   119         'dict' => [
       
   120             'port' => 2628
       
   121         ],
       
   122         'file' => [
       
   123             'ihost' => 'localhost'
       
   124         ],
       
   125         'http' => [
       
   126             'port' => 80,
       
   127             'ipath' => '/'
       
   128         ],
       
   129         'https' => [
       
   130             'port' => 443,
       
   131             'ipath' => '/'
       
   132         ],
       
   133     ];
       
   134 
       
   135     /**
       
   136      * Return the entire IRI when you try and read the object as a string
       
   137      *
       
   138      * @return string
       
   139      */
       
   140     public function __toString()
       
   141     {
       
   142         return $this->get_iri();
       
   143     }
       
   144 
       
   145     /**
       
   146      * Overload __set() to provide access via properties
       
   147      *
       
   148      * @param string $name Property name
       
   149      * @param mixed $value Property value
       
   150      */
       
   151     public function __set($name, $value)
       
   152     {
       
   153         if (method_exists($this, 'set_' . $name)) {
       
   154             call_user_func([$this, 'set_' . $name], $value);
       
   155         } elseif (
       
   156             $name === 'iauthority'
       
   157             || $name === 'iuserinfo'
       
   158             || $name === 'ihost'
       
   159             || $name === 'ipath'
       
   160             || $name === 'iquery'
       
   161             || $name === 'ifragment'
       
   162         ) {
       
   163             call_user_func([$this, 'set_' . substr($name, 1)], $value);
       
   164         }
       
   165     }
       
   166 
       
   167     /**
       
   168      * Overload __get() to provide access via properties
       
   169      *
       
   170      * @param string $name Property name
       
   171      * @return mixed
       
   172      */
       
   173     public function __get($name)
       
   174     {
       
   175         // isset() returns false for null, we don't want to do that
       
   176         // Also why we use array_key_exists below instead of isset()
       
   177         $props = get_object_vars($this);
       
   178 
       
   179         if (
       
   180             $name === 'iri' ||
       
   181             $name === 'uri' ||
       
   182             $name === 'iauthority' ||
       
   183             $name === 'authority'
       
   184         ) {
       
   185             $return = $this->{"get_$name"}();
       
   186         } elseif (array_key_exists($name, $props)) {
       
   187             $return = $this->$name;
       
   188         }
       
   189         // host -> ihost
       
   190         elseif (($prop = 'i' . $name) && array_key_exists($prop, $props)) {
       
   191             $name = $prop;
       
   192             $return = $this->$prop;
       
   193         }
       
   194         // ischeme -> scheme
       
   195         elseif (($prop = substr($name, 1)) && array_key_exists($prop, $props)) {
       
   196             $name = $prop;
       
   197             $return = $this->$prop;
       
   198         } else {
       
   199             trigger_error('Undefined property: ' . get_class($this) . '::' . $name, E_USER_NOTICE);
       
   200             $return = null;
       
   201         }
       
   202 
       
   203         if ($return === null && isset($this->normalization[$this->scheme][$name])) {
       
   204             return $this->normalization[$this->scheme][$name];
       
   205         }
       
   206 
       
   207         return $return;
       
   208     }
       
   209 
       
   210     /**
       
   211      * Overload __isset() to provide access via properties
       
   212      *
       
   213      * @param string $name Property name
       
   214      * @return bool
       
   215      */
       
   216     public function __isset($name)
       
   217     {
       
   218         return method_exists($this, 'get_' . $name) || isset($this->$name);
       
   219     }
       
   220 
       
   221     /**
       
   222      * Overload __unset() to provide access via properties
       
   223      *
       
   224      * @param string $name Property name
       
   225      */
       
   226     public function __unset($name)
       
   227     {
       
   228         if (method_exists($this, 'set_' . $name)) {
       
   229             call_user_func([$this, 'set_' . $name], '');
       
   230         }
       
   231     }
       
   232 
       
   233     /**
       
   234      * Create a new IRI object, from a specified string
       
   235      *
       
   236      * @param string $iri
       
   237      */
       
   238     public function __construct($iri = null)
       
   239     {
       
   240         $this->set_iri($iri);
       
   241     }
       
   242 
       
   243     /**
       
   244      * Clean up
       
   245      */
       
   246     public function __destruct()
       
   247     {
       
   248         $this->set_iri(null, true);
       
   249         $this->set_path(null, true);
       
   250         $this->set_authority(null, true);
       
   251     }
       
   252 
       
   253     /**
       
   254      * Create a new IRI object by resolving a relative IRI
       
   255      *
       
   256      * Returns false if $base is not absolute, otherwise an IRI.
       
   257      *
       
   258      * @param IRI|string $base (Absolute) Base IRI
       
   259      * @param IRI|string $relative Relative IRI
       
   260      * @return IRI|false
       
   261      */
       
   262     public static function absolutize($base, $relative)
       
   263     {
       
   264         if (!($relative instanceof IRI)) {
       
   265             $relative = new IRI($relative);
       
   266         }
       
   267         if (!$relative->is_valid()) {
       
   268             return false;
       
   269         } elseif ($relative->scheme !== null) {
       
   270             return clone $relative;
       
   271         } else {
       
   272             if (!($base instanceof IRI)) {
       
   273                 $base = new IRI($base);
       
   274             }
       
   275             if ($base->scheme !== null && $base->is_valid()) {
       
   276                 if ($relative->get_iri() !== '') {
       
   277                     if ($relative->iuserinfo !== null || $relative->ihost !== null || $relative->port !== null) {
       
   278                         $target = clone $relative;
       
   279                         $target->scheme = $base->scheme;
       
   280                     } else {
       
   281                         $target = new IRI();
       
   282                         $target->scheme = $base->scheme;
       
   283                         $target->iuserinfo = $base->iuserinfo;
       
   284                         $target->ihost = $base->ihost;
       
   285                         $target->port = $base->port;
       
   286                         if ($relative->ipath !== '') {
       
   287                             if ($relative->ipath[0] === '/') {
       
   288                                 $target->ipath = $relative->ipath;
       
   289                             } elseif (($base->iuserinfo !== null || $base->ihost !== null || $base->port !== null) && $base->ipath === '') {
       
   290                                 $target->ipath = '/' . $relative->ipath;
       
   291                             } elseif (($last_segment = strrpos($base->ipath, '/')) !== false) {
       
   292                                 $target->ipath = substr($base->ipath, 0, $last_segment + 1) . $relative->ipath;
       
   293                             } else {
       
   294                                 $target->ipath = $relative->ipath;
       
   295                             }
       
   296                             $target->ipath = $target->remove_dot_segments($target->ipath);
       
   297                             $target->iquery = $relative->iquery;
       
   298                         } else {
       
   299                             $target->ipath = $base->ipath;
       
   300                             if ($relative->iquery !== null) {
       
   301                                 $target->iquery = $relative->iquery;
       
   302                             } elseif ($base->iquery !== null) {
       
   303                                 $target->iquery = $base->iquery;
       
   304                             }
       
   305                         }
       
   306                         $target->ifragment = $relative->ifragment;
       
   307                     }
       
   308                 } else {
       
   309                     $target = clone $base;
       
   310                     $target->ifragment = null;
       
   311                 }
       
   312                 $target->scheme_normalization();
       
   313                 return $target;
       
   314             }
       
   315 
       
   316             return false;
       
   317         }
       
   318     }
       
   319 
       
   320     /**
       
   321      * Parse an IRI into scheme/authority/path/query/fragment segments
       
   322      *
       
   323      * @param string $iri
       
   324      * @return array
       
   325      */
       
   326     protected function parse_iri($iri)
       
   327     {
       
   328         $iri = trim($iri, "\x20\x09\x0A\x0C\x0D");
       
   329         if (preg_match('/^((?P<scheme>[^:\/?#]+):)?(\/\/(?P<authority>[^\/?#]*))?(?P<path>[^?#]*)(\?(?P<query>[^#]*))?(#(?P<fragment>.*))?$/', $iri, $match)) {
       
   330             if ($match[1] === '') {
       
   331                 $match['scheme'] = null;
       
   332             }
       
   333             if (!isset($match[3]) || $match[3] === '') {
       
   334                 $match['authority'] = null;
       
   335             }
       
   336             if (!isset($match[5])) {
       
   337                 $match['path'] = '';
       
   338             }
       
   339             if (!isset($match[6]) || $match[6] === '') {
       
   340                 $match['query'] = null;
       
   341             }
       
   342             if (!isset($match[8]) || $match[8] === '') {
       
   343                 $match['fragment'] = null;
       
   344             }
       
   345             return $match;
       
   346         }
       
   347 
       
   348         // This can occur when a paragraph is accidentally parsed as a URI
       
   349         return false;
       
   350     }
       
   351 
       
   352     /**
       
   353      * Remove dot segments from a path
       
   354      *
       
   355      * @param string $input
       
   356      * @return string
       
   357      */
       
   358     protected function remove_dot_segments($input)
       
   359     {
       
   360         $output = '';
       
   361         while (strpos($input, './') !== false || strpos($input, '/.') !== false || $input === '.' || $input === '..') {
       
   362             // A: If the input buffer begins with a prefix of "../" or "./", then remove that prefix from the input buffer; otherwise,
       
   363             if (strpos($input, '../') === 0) {
       
   364                 $input = substr($input, 3);
       
   365             } elseif (strpos($input, './') === 0) {
       
   366                 $input = substr($input, 2);
       
   367             }
       
   368             // B: if the input buffer begins with a prefix of "/./" or "/.", where "." is a complete path segment, then replace that prefix with "/" in the input buffer; otherwise,
       
   369             elseif (strpos($input, '/./') === 0) {
       
   370                 $input = substr($input, 2);
       
   371             } elseif ($input === '/.') {
       
   372                 $input = '/';
       
   373             }
       
   374             // C: if the input buffer begins with a prefix of "/../" or "/..", where ".." is a complete path segment, then replace that prefix with "/" in the input buffer and remove the last segment and its preceding "/" (if any) from the output buffer; otherwise,
       
   375             elseif (strpos($input, '/../') === 0) {
       
   376                 $input = substr($input, 3);
       
   377                 $output = substr_replace($output, '', intval(strrpos($output, '/')));
       
   378             } elseif ($input === '/..') {
       
   379                 $input = '/';
       
   380                 $output = substr_replace($output, '', intval(strrpos($output, '/')));
       
   381             }
       
   382             // D: if the input buffer consists only of "." or "..", then remove that from the input buffer; otherwise,
       
   383             elseif ($input === '.' || $input === '..') {
       
   384                 $input = '';
       
   385             }
       
   386             // E: move the first path segment in the input buffer to the end of the output buffer, including the initial "/" character (if any) and any subsequent characters up to, but not including, the next "/" character or the end of the input buffer
       
   387             elseif (($pos = strpos($input, '/', 1)) !== false) {
       
   388                 $output .= substr($input, 0, $pos);
       
   389                 $input = substr_replace($input, '', 0, $pos);
       
   390             } else {
       
   391                 $output .= $input;
       
   392                 $input = '';
       
   393             }
       
   394         }
       
   395         return $output . $input;
       
   396     }
       
   397 
       
   398     /**
       
   399      * Replace invalid character with percent encoding
       
   400      *
       
   401      * @param string $string Input string
       
   402      * @param string $extra_chars Valid characters not in iunreserved or
       
   403      *                            iprivate (this is ASCII-only)
       
   404      * @param bool $iprivate Allow iprivate
       
   405      * @return string
       
   406      */
       
   407     protected function replace_invalid_with_pct_encoding($string, $extra_chars, $iprivate = false)
       
   408     {
       
   409         // Normalize as many pct-encoded sections as possible
       
   410         $string = preg_replace_callback('/(?:%[A-Fa-f0-9]{2})+/', [$this, 'remove_iunreserved_percent_encoded'], $string);
       
   411 
       
   412         // Replace invalid percent characters
       
   413         $string = preg_replace('/%(?![A-Fa-f0-9]{2})/', '%25', $string);
       
   414 
       
   415         // Add unreserved and % to $extra_chars (the latter is safe because all
       
   416         // pct-encoded sections are now valid).
       
   417         $extra_chars .= 'ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789-._~%';
       
   418 
       
   419         // Now replace any bytes that aren't allowed with their pct-encoded versions
       
   420         $position = 0;
       
   421         $strlen = strlen($string);
       
   422         while (($position += strspn($string, $extra_chars, $position)) < $strlen) {
       
   423             $value = ord($string[$position]);
       
   424             $character = 0;
       
   425 
       
   426             // Start position
       
   427             $start = $position;
       
   428 
       
   429             // By default we are valid
       
   430             $valid = true;
       
   431 
       
   432             // No one byte sequences are valid due to the while.
       
   433             // Two byte sequence:
       
   434             if (($value & 0xE0) === 0xC0) {
       
   435                 $character = ($value & 0x1F) << 6;
       
   436                 $length = 2;
       
   437                 $remaining = 1;
       
   438             }
       
   439             // Three byte sequence:
       
   440             elseif (($value & 0xF0) === 0xE0) {
       
   441                 $character = ($value & 0x0F) << 12;
       
   442                 $length = 3;
       
   443                 $remaining = 2;
       
   444             }
       
   445             // Four byte sequence:
       
   446             elseif (($value & 0xF8) === 0xF0) {
       
   447                 $character = ($value & 0x07) << 18;
       
   448                 $length = 4;
       
   449                 $remaining = 3;
       
   450             }
       
   451             // Invalid byte:
       
   452             else {
       
   453                 $valid = false;
       
   454                 $length = 1;
       
   455                 $remaining = 0;
       
   456             }
       
   457 
       
   458             if ($remaining) {
       
   459                 if ($position + $length <= $strlen) {
       
   460                     for ($position++; $remaining; $position++) {
       
   461                         $value = ord($string[$position]);
       
   462 
       
   463                         // Check that the byte is valid, then add it to the character:
       
   464                         if (($value & 0xC0) === 0x80) {
       
   465                             $character |= ($value & 0x3F) << (--$remaining * 6);
       
   466                         }
       
   467                         // If it is invalid, count the sequence as invalid and reprocess the current byte:
       
   468                         else {
       
   469                             $valid = false;
       
   470                             $position--;
       
   471                             break;
       
   472                         }
       
   473                     }
       
   474                 } else {
       
   475                     $position = $strlen - 1;
       
   476                     $valid = false;
       
   477                 }
       
   478             }
       
   479 
       
   480             // Percent encode anything invalid or not in ucschar
       
   481             if (
       
   482                 // Invalid sequences
       
   483                 !$valid
       
   484                 // Non-shortest form sequences are invalid
       
   485                 || $length > 1 && $character <= 0x7F
       
   486                 || $length > 2 && $character <= 0x7FF
       
   487                 || $length > 3 && $character <= 0xFFFF
       
   488                 // Outside of range of ucschar codepoints
       
   489                 // Noncharacters
       
   490                 || ($character & 0xFFFE) === 0xFFFE
       
   491                 || $character >= 0xFDD0 && $character <= 0xFDEF
       
   492                 || (
       
   493                     // Everything else not in ucschar
       
   494                     $character > 0xD7FF && $character < 0xF900
       
   495                     || $character < 0xA0
       
   496                     || $character > 0xEFFFD
       
   497                 )
       
   498                 && (
       
   499                     // Everything not in iprivate, if it applies
       
   500                     !$iprivate
       
   501                     || $character < 0xE000
       
   502                     || $character > 0x10FFFD
       
   503                 )
       
   504             ) {
       
   505                 // If we were a character, pretend we weren't, but rather an error.
       
   506                 if ($valid) {
       
   507                     $position--;
       
   508                 }
       
   509 
       
   510                 for ($j = $start; $j <= $position; $j++) {
       
   511                     $string = substr_replace($string, sprintf('%%%02X', ord($string[$j])), $j, 1);
       
   512                     $j += 2;
       
   513                     $position += 2;
       
   514                     $strlen += 2;
       
   515                 }
       
   516             }
       
   517         }
       
   518 
       
   519         return $string;
       
   520     }
       
   521 
       
   522     /**
       
   523      * Callback function for preg_replace_callback.
       
   524      *
       
   525      * Removes sequences of percent encoded bytes that represent UTF-8
       
   526      * encoded characters in iunreserved
       
   527      *
       
   528      * @param array $match PCRE match
       
   529      * @return string Replacement
       
   530      */
       
   531     protected function remove_iunreserved_percent_encoded($match)
       
   532     {
       
   533         // As we just have valid percent encoded sequences we can just explode
       
   534         // and ignore the first member of the returned array (an empty string).
       
   535         $bytes = explode('%', $match[0]);
       
   536 
       
   537         // Initialize the new string (this is what will be returned) and that
       
   538         // there are no bytes remaining in the current sequence (unsurprising
       
   539         // at the first byte!).
       
   540         $string = '';
       
   541         $remaining = 0;
       
   542 
       
   543         // these variables will be initialized in the loop but PHPStan is not able to detect it currently
       
   544         $start = 0;
       
   545         $character = 0;
       
   546         $length = 0;
       
   547         $valid = true;
       
   548 
       
   549         // Loop over each and every byte, and set $value to its value
       
   550         for ($i = 1, $len = count($bytes); $i < $len; $i++) {
       
   551             $value = hexdec($bytes[$i]);
       
   552 
       
   553             // If we're the first byte of sequence:
       
   554             if (!$remaining) {
       
   555                 // Start position
       
   556                 $start = $i;
       
   557 
       
   558                 // By default we are valid
       
   559                 $valid = true;
       
   560 
       
   561                 // One byte sequence:
       
   562                 if ($value <= 0x7F) {
       
   563                     $character = $value;
       
   564                     $length = 1;
       
   565                 }
       
   566                 // Two byte sequence:
       
   567                 elseif (($value & 0xE0) === 0xC0) {
       
   568                     $character = ($value & 0x1F) << 6;
       
   569                     $length = 2;
       
   570                     $remaining = 1;
       
   571                 }
       
   572                 // Three byte sequence:
       
   573                 elseif (($value & 0xF0) === 0xE0) {
       
   574                     $character = ($value & 0x0F) << 12;
       
   575                     $length = 3;
       
   576                     $remaining = 2;
       
   577                 }
       
   578                 // Four byte sequence:
       
   579                 elseif (($value & 0xF8) === 0xF0) {
       
   580                     $character = ($value & 0x07) << 18;
       
   581                     $length = 4;
       
   582                     $remaining = 3;
       
   583                 }
       
   584                 // Invalid byte:
       
   585                 else {
       
   586                     $valid = false;
       
   587                     $remaining = 0;
       
   588                 }
       
   589             }
       
   590             // Continuation byte:
       
   591             else {
       
   592                 // Check that the byte is valid, then add it to the character:
       
   593                 if (($value & 0xC0) === 0x80) {
       
   594                     $remaining--;
       
   595                     $character |= ($value & 0x3F) << ($remaining * 6);
       
   596                 }
       
   597                 // If it is invalid, count the sequence as invalid and reprocess the current byte as the start of a sequence:
       
   598                 else {
       
   599                     $valid = false;
       
   600                     $remaining = 0;
       
   601                     $i--;
       
   602                 }
       
   603             }
       
   604 
       
   605             // If we've reached the end of the current byte sequence, append it to Unicode::$data
       
   606             if (!$remaining) {
       
   607                 // Percent encode anything invalid or not in iunreserved
       
   608                 if (
       
   609                     // Invalid sequences
       
   610                     !$valid
       
   611                     // Non-shortest form sequences are invalid
       
   612                     || $length > 1 && $character <= 0x7F
       
   613                     || $length > 2 && $character <= 0x7FF
       
   614                     || $length > 3 && $character <= 0xFFFF
       
   615                     // Outside of range of iunreserved codepoints
       
   616                     || $character < 0x2D
       
   617                     || $character > 0xEFFFD
       
   618                     // Noncharacters
       
   619                     || ($character & 0xFFFE) === 0xFFFE
       
   620                     || $character >= 0xFDD0 && $character <= 0xFDEF
       
   621                     // Everything else not in iunreserved (this is all BMP)
       
   622                     || $character === 0x2F
       
   623                     || $character > 0x39 && $character < 0x41
       
   624                     || $character > 0x5A && $character < 0x61
       
   625                     || $character > 0x7A && $character < 0x7E
       
   626                     || $character > 0x7E && $character < 0xA0
       
   627                     || $character > 0xD7FF && $character < 0xF900
       
   628                 ) {
       
   629                     for ($j = $start; $j <= $i; $j++) {
       
   630                         $string .= '%' . strtoupper($bytes[$j]);
       
   631                     }
       
   632                 } else {
       
   633                     for ($j = $start; $j <= $i; $j++) {
       
   634                         $string .= chr(hexdec($bytes[$j]));
       
   635                     }
       
   636                 }
       
   637             }
       
   638         }
       
   639 
       
   640         // If we have any bytes left over they are invalid (i.e., we are
       
   641         // mid-way through a multi-byte sequence)
       
   642         if ($remaining) {
       
   643             for ($j = $start; $j < $len; $j++) {
       
   644                 $string .= '%' . strtoupper($bytes[$j]);
       
   645             }
       
   646         }
       
   647 
       
   648         return $string;
       
   649     }
       
   650 
       
   651     protected function scheme_normalization()
       
   652     {
       
   653         if (isset($this->normalization[$this->scheme]['iuserinfo']) && $this->iuserinfo === $this->normalization[$this->scheme]['iuserinfo']) {
       
   654             $this->iuserinfo = null;
       
   655         }
       
   656         if (isset($this->normalization[$this->scheme]['ihost']) && $this->ihost === $this->normalization[$this->scheme]['ihost']) {
       
   657             $this->ihost = null;
       
   658         }
       
   659         if (isset($this->normalization[$this->scheme]['port']) && $this->port === $this->normalization[$this->scheme]['port']) {
       
   660             $this->port = null;
       
   661         }
       
   662         if (isset($this->normalization[$this->scheme]['ipath']) && $this->ipath === $this->normalization[$this->scheme]['ipath']) {
       
   663             $this->ipath = '';
       
   664         }
       
   665         if (isset($this->normalization[$this->scheme]['iquery']) && $this->iquery === $this->normalization[$this->scheme]['iquery']) {
       
   666             $this->iquery = null;
       
   667         }
       
   668         if (isset($this->normalization[$this->scheme]['ifragment']) && $this->ifragment === $this->normalization[$this->scheme]['ifragment']) {
       
   669             $this->ifragment = null;
       
   670         }
       
   671     }
       
   672 
       
   673     /**
       
   674      * Check if the object represents a valid IRI. This needs to be done on each
       
   675      * call as some things change depending on another part of the IRI.
       
   676      *
       
   677      * @return bool
       
   678      */
       
   679     public function is_valid()
       
   680     {
       
   681         if ($this->ipath === '') {
       
   682             return true;
       
   683         }
       
   684 
       
   685         $isauthority = $this->iuserinfo !== null || $this->ihost !== null ||
       
   686             $this->port !== null;
       
   687         if ($isauthority && $this->ipath[0] === '/') {
       
   688             return true;
       
   689         }
       
   690 
       
   691         if (!$isauthority && (substr($this->ipath, 0, 2) === '//')) {
       
   692             return false;
       
   693         }
       
   694 
       
   695         // Relative urls cannot have a colon in the first path segment (and the
       
   696         // slashes themselves are not included so skip the first character).
       
   697         if (!$this->scheme && !$isauthority &&
       
   698             strpos($this->ipath, ':') !== false &&
       
   699             strpos($this->ipath, '/', 1) !== false &&
       
   700             strpos($this->ipath, ':') < strpos($this->ipath, '/', 1)) {
       
   701             return false;
       
   702         }
       
   703 
       
   704         return true;
       
   705     }
       
   706 
       
   707     /**
       
   708      * Set the entire IRI. Returns true on success, false on failure (if there
       
   709      * are any invalid characters).
       
   710      *
       
   711      * @param string $iri
       
   712      * @return bool
       
   713      */
       
   714     public function set_iri($iri, $clear_cache = false)
       
   715     {
       
   716         static $cache;
       
   717         if ($clear_cache) {
       
   718             $cache = null;
       
   719             return;
       
   720         }
       
   721         if (!$cache) {
       
   722             $cache = [];
       
   723         }
       
   724 
       
   725         if ($iri === null) {
       
   726             return true;
       
   727         } elseif (isset($cache[$iri])) {
       
   728             [
       
   729                 $this->scheme,
       
   730                 $this->iuserinfo,
       
   731                 $this->ihost,
       
   732                 $this->port,
       
   733                 $this->ipath,
       
   734                 $this->iquery,
       
   735                 $this->ifragment,
       
   736                 $return
       
   737             ] = $cache[$iri];
       
   738 
       
   739             return $return;
       
   740         }
       
   741 
       
   742         $parsed = $this->parse_iri((string) $iri);
       
   743         if (!$parsed) {
       
   744             return false;
       
   745         }
       
   746 
       
   747         $return = $this->set_scheme($parsed['scheme'])
       
   748             && $this->set_authority($parsed['authority'])
       
   749             && $this->set_path($parsed['path'])
       
   750             && $this->set_query($parsed['query'])
       
   751             && $this->set_fragment($parsed['fragment']);
       
   752 
       
   753         $cache[$iri] = [
       
   754             $this->scheme,
       
   755             $this->iuserinfo,
       
   756             $this->ihost,
       
   757             $this->port,
       
   758             $this->ipath,
       
   759             $this->iquery,
       
   760             $this->ifragment,
       
   761             $return
       
   762         ];
       
   763 
       
   764         return $return;
       
   765     }
       
   766 
       
   767     /**
       
   768      * Set the scheme. Returns true on success, false on failure (if there are
       
   769      * any invalid characters).
       
   770      *
       
   771      * @param string $scheme
       
   772      * @return bool
       
   773      */
       
   774     public function set_scheme($scheme)
       
   775     {
       
   776         if ($scheme === null) {
       
   777             $this->scheme = null;
       
   778         } elseif (!preg_match('/^[A-Za-z][0-9A-Za-z+\-.]*$/', $scheme)) {
       
   779             $this->scheme = null;
       
   780             return false;
       
   781         } else {
       
   782             $this->scheme = strtolower($scheme);
       
   783         }
       
   784         return true;
       
   785     }
       
   786 
       
   787     /**
       
   788      * Set the authority. Returns true on success, false on failure (if there are
       
   789      * any invalid characters).
       
   790      *
       
   791      * @param string $authority
       
   792      * @return bool
       
   793      */
       
   794     public function set_authority($authority, $clear_cache = false)
       
   795     {
       
   796         static $cache;
       
   797         if ($clear_cache) {
       
   798             $cache = null;
       
   799             return;
       
   800         }
       
   801         if (!$cache) {
       
   802             $cache = [];
       
   803         }
       
   804 
       
   805         if ($authority === null) {
       
   806             $this->iuserinfo = null;
       
   807             $this->ihost = null;
       
   808             $this->port = null;
       
   809             return true;
       
   810         } elseif (isset($cache[$authority])) {
       
   811             [
       
   812                 $this->iuserinfo,
       
   813                 $this->ihost,
       
   814                 $this->port,
       
   815                 $return
       
   816             ] = $cache[$authority];
       
   817 
       
   818             return $return;
       
   819         }
       
   820 
       
   821         $remaining = $authority;
       
   822         if (($iuserinfo_end = strrpos($remaining, '@')) !== false) {
       
   823             $iuserinfo = substr($remaining, 0, $iuserinfo_end);
       
   824             $remaining = substr($remaining, $iuserinfo_end + 1);
       
   825         } else {
       
   826             $iuserinfo = null;
       
   827         }
       
   828         if (($port_start = strpos($remaining, ':', intval(strpos($remaining, ']')))) !== false) {
       
   829             if (($port = substr($remaining, $port_start + 1)) === false) {
       
   830                 $port = null;
       
   831             }
       
   832             $remaining = substr($remaining, 0, $port_start);
       
   833         } else {
       
   834             $port = null;
       
   835         }
       
   836 
       
   837         $return = $this->set_userinfo($iuserinfo) &&
       
   838                   $this->set_host($remaining) &&
       
   839                   $this->set_port($port);
       
   840 
       
   841         $cache[$authority] = [
       
   842             $this->iuserinfo,
       
   843             $this->ihost,
       
   844             $this->port,
       
   845             $return
       
   846         ];
       
   847 
       
   848         return $return;
       
   849     }
       
   850 
       
   851     /**
       
   852      * Set the iuserinfo.
       
   853      *
       
   854      * @param string $iuserinfo
       
   855      * @return bool
       
   856      */
       
   857     public function set_userinfo($iuserinfo)
       
   858     {
       
   859         if ($iuserinfo === null) {
       
   860             $this->iuserinfo = null;
       
   861         } else {
       
   862             $this->iuserinfo = $this->replace_invalid_with_pct_encoding($iuserinfo, '!$&\'()*+,;=:');
       
   863             $this->scheme_normalization();
       
   864         }
       
   865 
       
   866         return true;
       
   867     }
       
   868 
       
   869     /**
       
   870      * Set the ihost. Returns true on success, false on failure (if there are
       
   871      * any invalid characters).
       
   872      *
       
   873      * @param string $ihost
       
   874      * @return bool
       
   875      */
       
   876     public function set_host($ihost)
       
   877     {
       
   878         if ($ihost === null) {
       
   879             $this->ihost = null;
       
   880             return true;
       
   881         } elseif (substr($ihost, 0, 1) === '[' && substr($ihost, -1) === ']') {
       
   882             if (\SimplePie\Net\IPv6::check_ipv6(substr($ihost, 1, -1))) {
       
   883                 $this->ihost = '[' . \SimplePie\Net\IPv6::compress(substr($ihost, 1, -1)) . ']';
       
   884             } else {
       
   885                 $this->ihost = null;
       
   886                 return false;
       
   887             }
       
   888         } else {
       
   889             $ihost = $this->replace_invalid_with_pct_encoding($ihost, '!$&\'()*+,;=');
       
   890 
       
   891             // Lowercase, but ignore pct-encoded sections (as they should
       
   892             // remain uppercase). This must be done after the previous step
       
   893             // as that can add unescaped characters.
       
   894             $position = 0;
       
   895             $strlen = strlen($ihost);
       
   896             while (($position += strcspn($ihost, 'ABCDEFGHIJKLMNOPQRSTUVWXYZ%', $position)) < $strlen) {
       
   897                 if ($ihost[$position] === '%') {
       
   898                     $position += 3;
       
   899                 } else {
       
   900                     $ihost[$position] = strtolower($ihost[$position]);
       
   901                     $position++;
       
   902                 }
       
   903             }
       
   904 
       
   905             $this->ihost = $ihost;
       
   906         }
       
   907 
       
   908         $this->scheme_normalization();
       
   909 
       
   910         return true;
       
   911     }
       
   912 
       
   913     /**
       
   914      * Set the port. Returns true on success, false on failure (if there are
       
   915      * any invalid characters).
       
   916      *
       
   917      * @param string $port
       
   918      * @return bool
       
   919      */
       
   920     public function set_port($port)
       
   921     {
       
   922         if ($port === null) {
       
   923             $this->port = null;
       
   924             return true;
       
   925         } elseif (strspn($port, '0123456789') === strlen($port)) {
       
   926             $this->port = (int) $port;
       
   927             $this->scheme_normalization();
       
   928             return true;
       
   929         }
       
   930 
       
   931         $this->port = null;
       
   932         return false;
       
   933     }
       
   934 
       
   935     /**
       
   936      * Set the ipath.
       
   937      *
       
   938      * @param string $ipath
       
   939      * @return bool
       
   940      */
       
   941     public function set_path($ipath, $clear_cache = false)
       
   942     {
       
   943         static $cache;
       
   944         if ($clear_cache) {
       
   945             $cache = null;
       
   946             return;
       
   947         }
       
   948         if (!$cache) {
       
   949             $cache = [];
       
   950         }
       
   951 
       
   952         $ipath = (string) $ipath;
       
   953 
       
   954         if (isset($cache[$ipath])) {
       
   955             $this->ipath = $cache[$ipath][(int) ($this->scheme !== null)];
       
   956         } else {
       
   957             $valid = $this->replace_invalid_with_pct_encoding($ipath, '!$&\'()*+,;=@:/');
       
   958             $removed = $this->remove_dot_segments($valid);
       
   959 
       
   960             $cache[$ipath] = [$valid, $removed];
       
   961             $this->ipath =  ($this->scheme !== null) ? $removed : $valid;
       
   962         }
       
   963 
       
   964         $this->scheme_normalization();
       
   965         return true;
       
   966     }
       
   967 
       
   968     /**
       
   969      * Set the iquery.
       
   970      *
       
   971      * @param string $iquery
       
   972      * @return bool
       
   973      */
       
   974     public function set_query($iquery)
       
   975     {
       
   976         if ($iquery === null) {
       
   977             $this->iquery = null;
       
   978         } else {
       
   979             $this->iquery = $this->replace_invalid_with_pct_encoding($iquery, '!$&\'()*+,;=:@/?', true);
       
   980             $this->scheme_normalization();
       
   981         }
       
   982         return true;
       
   983     }
       
   984 
       
   985     /**
       
   986      * Set the ifragment.
       
   987      *
       
   988      * @param string $ifragment
       
   989      * @return bool
       
   990      */
       
   991     public function set_fragment($ifragment)
       
   992     {
       
   993         if ($ifragment === null) {
       
   994             $this->ifragment = null;
       
   995         } else {
       
   996             $this->ifragment = $this->replace_invalid_with_pct_encoding($ifragment, '!$&\'()*+,;=:@/?');
       
   997             $this->scheme_normalization();
       
   998         }
       
   999         return true;
       
  1000     }
       
  1001 
       
  1002     /**
       
  1003      * Convert an IRI to a URI (or parts thereof)
       
  1004      *
       
  1005      * @return string
       
  1006      */
       
  1007     public function to_uri($string)
       
  1008     {
       
  1009         static $non_ascii;
       
  1010         if (!$non_ascii) {
       
  1011             $non_ascii = implode('', range("\x80", "\xFF"));
       
  1012         }
       
  1013 
       
  1014         $position = 0;
       
  1015         $strlen = strlen($string);
       
  1016         while (($position += strcspn($string, $non_ascii, $position)) < $strlen) {
       
  1017             $string = substr_replace($string, sprintf('%%%02X', ord($string[$position])), $position, 1);
       
  1018             $position += 3;
       
  1019             $strlen += 2;
       
  1020         }
       
  1021 
       
  1022         return $string;
       
  1023     }
       
  1024 
       
  1025     /**
       
  1026      * Get the complete IRI
       
  1027      *
       
  1028      * @return string
       
  1029      */
       
  1030     public function get_iri()
       
  1031     {
       
  1032         if (!$this->is_valid()) {
       
  1033             return false;
       
  1034         }
       
  1035 
       
  1036         $iri = '';
       
  1037         if ($this->scheme !== null) {
       
  1038             $iri .= $this->scheme . ':';
       
  1039         }
       
  1040         if (($iauthority = $this->get_iauthority()) !== null) {
       
  1041             $iri .= '//' . $iauthority;
       
  1042         }
       
  1043         if ($this->ipath !== '') {
       
  1044             $iri .= $this->ipath;
       
  1045         } elseif (!empty($this->normalization[$this->scheme]['ipath']) && $iauthority !== null && $iauthority !== '') {
       
  1046             $iri .= $this->normalization[$this->scheme]['ipath'];
       
  1047         }
       
  1048         if ($this->iquery !== null) {
       
  1049             $iri .= '?' . $this->iquery;
       
  1050         }
       
  1051         if ($this->ifragment !== null) {
       
  1052             $iri .= '#' . $this->ifragment;
       
  1053         }
       
  1054 
       
  1055         return $iri;
       
  1056     }
       
  1057 
       
  1058     /**
       
  1059      * Get the complete URI
       
  1060      *
       
  1061      * @return string
       
  1062      */
       
  1063     public function get_uri()
       
  1064     {
       
  1065         return $this->to_uri($this->get_iri());
       
  1066     }
       
  1067 
       
  1068     /**
       
  1069      * Get the complete iauthority
       
  1070      *
       
  1071      * @return string
       
  1072      */
       
  1073     protected function get_iauthority()
       
  1074     {
       
  1075         if ($this->iuserinfo !== null || $this->ihost !== null || $this->port !== null) {
       
  1076             $iauthority = '';
       
  1077             if ($this->iuserinfo !== null) {
       
  1078                 $iauthority .= $this->iuserinfo . '@';
       
  1079             }
       
  1080             if ($this->ihost !== null) {
       
  1081                 $iauthority .= $this->ihost;
       
  1082             }
       
  1083             if ($this->port !== null && $this->port !== 0) {
       
  1084                 $iauthority .= ':' . $this->port;
       
  1085             }
       
  1086             return $iauthority;
       
  1087         }
       
  1088 
       
  1089         return null;
       
  1090     }
       
  1091 
       
  1092     /**
       
  1093      * Get the complete authority
       
  1094      *
       
  1095      * @return string
       
  1096      */
       
  1097     protected function get_authority()
       
  1098     {
       
  1099         $iauthority = $this->get_iauthority();
       
  1100         if (is_string($iauthority)) {
       
  1101             return $this->to_uri($iauthority);
       
  1102         }
       
  1103 
       
  1104         return $iauthority;
       
  1105     }
       
  1106 }
       
  1107 
       
  1108 class_alias('SimplePie\IRI', 'SimplePie_IRI');