wp/wp-includes/SimplePie/src/HTTP/Parser.php
changeset 22 8c2e4d02f4ef
equal deleted inserted replaced
21:48c4eec2b7e6 22:8c2e4d02f4ef
       
     1 <?php
       
     2 
       
     3 /**
       
     4  * SimplePie
       
     5  *
       
     6  * A PHP-Based RSS and Atom Feed Framework.
       
     7  * Takes the hard work out of managing a complete RSS/Atom solution.
       
     8  *
       
     9  * Copyright (c) 2004-2022, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
       
    10  * All rights reserved.
       
    11  *
       
    12  * Redistribution and use in source and binary forms, with or without modification, are
       
    13  * permitted provided that the following conditions are met:
       
    14  *
       
    15  * 	* Redistributions of source code must retain the above copyright notice, this list of
       
    16  * 	  conditions and the following disclaimer.
       
    17  *
       
    18  * 	* Redistributions in binary form must reproduce the above copyright notice, this list
       
    19  * 	  of conditions and the following disclaimer in the documentation and/or other materials
       
    20  * 	  provided with the distribution.
       
    21  *
       
    22  * 	* Neither the name of the SimplePie Team nor the names of its contributors may be used
       
    23  * 	  to endorse or promote products derived from this software without specific prior
       
    24  * 	  written permission.
       
    25  *
       
    26  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
       
    27  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
       
    28  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
       
    29  * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       
    30  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
       
    31  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       
    32  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
       
    33  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    34  * POSSIBILITY OF SUCH DAMAGE.
       
    35  *
       
    36  * @package SimplePie
       
    37  * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
       
    38  * @author Ryan Parman
       
    39  * @author Sam Sneddon
       
    40  * @author Ryan McCue
       
    41  * @link http://simplepie.org/ SimplePie
       
    42  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
       
    43  */
       
    44 
       
    45 namespace SimplePie\HTTP;
       
    46 
       
    47 /**
       
    48  * HTTP Response Parser
       
    49  *
       
    50  * @package SimplePie
       
    51  * @subpackage HTTP
       
    52  */
       
    53 class Parser
       
    54 {
       
    55     /**
       
    56      * HTTP Version
       
    57      *
       
    58      * @var float
       
    59      */
       
    60     public $http_version = 0.0;
       
    61 
       
    62     /**
       
    63      * Status code
       
    64      *
       
    65      * @var int
       
    66      */
       
    67     public $status_code = 0;
       
    68 
       
    69     /**
       
    70      * Reason phrase
       
    71      *
       
    72      * @var string
       
    73      */
       
    74     public $reason = '';
       
    75 
       
    76     /**
       
    77      * Key/value pairs of the headers
       
    78      *
       
    79      * @var array
       
    80      */
       
    81     public $headers = [];
       
    82 
       
    83     /**
       
    84      * Body of the response
       
    85      *
       
    86      * @var string
       
    87      */
       
    88     public $body = '';
       
    89 
       
    90     private const STATE_HTTP_VERSION = 'http_version';
       
    91 
       
    92     private const STATE_STATUS = 'status';
       
    93 
       
    94     private const STATE_REASON = 'reason';
       
    95 
       
    96     private const STATE_NEW_LINE = 'new_line';
       
    97 
       
    98     private const STATE_BODY = 'body';
       
    99 
       
   100     private const STATE_NAME = 'name';
       
   101 
       
   102     private const STATE_VALUE = 'value';
       
   103 
       
   104     private const STATE_VALUE_CHAR = 'value_char';
       
   105 
       
   106     private const STATE_QUOTE = 'quote';
       
   107 
       
   108     private const STATE_QUOTE_ESCAPED = 'quote_escaped';
       
   109 
       
   110     private const STATE_QUOTE_CHAR = 'quote_char';
       
   111 
       
   112     private const STATE_CHUNKED = 'chunked';
       
   113 
       
   114     private const STATE_EMIT = 'emit';
       
   115 
       
   116     private const STATE_ERROR = false;
       
   117 
       
   118     /**
       
   119      * Current state of the state machine
       
   120      *
       
   121      * @var self::STATE_*
       
   122      */
       
   123     protected $state = self::STATE_HTTP_VERSION;
       
   124 
       
   125     /**
       
   126      * Input data
       
   127      *
       
   128      * @var string
       
   129      */
       
   130     protected $data = '';
       
   131 
       
   132     /**
       
   133      * Input data length (to avoid calling strlen() everytime this is needed)
       
   134      *
       
   135      * @var int
       
   136      */
       
   137     protected $data_length = 0;
       
   138 
       
   139     /**
       
   140      * Current position of the pointer
       
   141      *
       
   142      * @var int
       
   143      */
       
   144     protected $position = 0;
       
   145 
       
   146     /**
       
   147      * Name of the hedaer currently being parsed
       
   148      *
       
   149      * @var string
       
   150      */
       
   151     protected $name = '';
       
   152 
       
   153     /**
       
   154      * Value of the hedaer currently being parsed
       
   155      *
       
   156      * @var string
       
   157      */
       
   158     protected $value = '';
       
   159 
       
   160     /**
       
   161      * Create an instance of the class with the input data
       
   162      *
       
   163      * @param string $data Input data
       
   164      */
       
   165     public function __construct($data)
       
   166     {
       
   167         $this->data = $data;
       
   168         $this->data_length = strlen($this->data);
       
   169     }
       
   170 
       
   171     /**
       
   172      * Parse the input data
       
   173      *
       
   174      * @return bool true on success, false on failure
       
   175      */
       
   176     public function parse()
       
   177     {
       
   178         while ($this->state && $this->state !== self::STATE_EMIT && $this->has_data()) {
       
   179             $state = $this->state;
       
   180             $this->$state();
       
   181         }
       
   182         $this->data = '';
       
   183         if ($this->state === self::STATE_EMIT || $this->state === self::STATE_BODY) {
       
   184             return true;
       
   185         }
       
   186 
       
   187         $this->http_version = '';
       
   188         $this->status_code = 0;
       
   189         $this->reason = '';
       
   190         $this->headers = [];
       
   191         $this->body = '';
       
   192         return false;
       
   193     }
       
   194 
       
   195     /**
       
   196      * Check whether there is data beyond the pointer
       
   197      *
       
   198      * @return bool true if there is further data, false if not
       
   199      */
       
   200     protected function has_data()
       
   201     {
       
   202         return (bool) ($this->position < $this->data_length);
       
   203     }
       
   204 
       
   205     /**
       
   206      * See if the next character is LWS
       
   207      *
       
   208      * @return bool true if the next character is LWS, false if not
       
   209      */
       
   210     protected function is_linear_whitespace()
       
   211     {
       
   212         return (bool) ($this->data[$this->position] === "\x09"
       
   213             || $this->data[$this->position] === "\x20"
       
   214             || ($this->data[$this->position] === "\x0A"
       
   215                 && isset($this->data[$this->position + 1])
       
   216                 && ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20")));
       
   217     }
       
   218 
       
   219     /**
       
   220      * Parse the HTTP version
       
   221      */
       
   222     protected function http_version()
       
   223     {
       
   224         if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/') {
       
   225             $len = strspn($this->data, '0123456789.', 5);
       
   226             $this->http_version = substr($this->data, 5, $len);
       
   227             $this->position += 5 + $len;
       
   228             if (substr_count($this->http_version, '.') <= 1) {
       
   229                 $this->http_version = (float) $this->http_version;
       
   230                 $this->position += strspn($this->data, "\x09\x20", $this->position);
       
   231                 $this->state = self::STATE_STATUS;
       
   232             } else {
       
   233                 $this->state = self::STATE_ERROR;
       
   234             }
       
   235         } else {
       
   236             $this->state = self::STATE_ERROR;
       
   237         }
       
   238     }
       
   239 
       
   240     /**
       
   241      * Parse the status code
       
   242      */
       
   243     protected function status()
       
   244     {
       
   245         if ($len = strspn($this->data, '0123456789', $this->position)) {
       
   246             $this->status_code = (int) substr($this->data, $this->position, $len);
       
   247             $this->position += $len;
       
   248             $this->state = self::STATE_REASON;
       
   249         } else {
       
   250             $this->state = self::STATE_ERROR;
       
   251         }
       
   252     }
       
   253 
       
   254     /**
       
   255      * Parse the reason phrase
       
   256      */
       
   257     protected function reason()
       
   258     {
       
   259         $len = strcspn($this->data, "\x0A", $this->position);
       
   260         $this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20");
       
   261         $this->position += $len + 1;
       
   262         $this->state = self::STATE_NEW_LINE;
       
   263     }
       
   264 
       
   265     /**
       
   266      * Deal with a new line, shifting data around as needed
       
   267      */
       
   268     protected function new_line()
       
   269     {
       
   270         $this->value = trim($this->value, "\x0D\x20");
       
   271         if ($this->name !== '' && $this->value !== '') {
       
   272             $this->name = strtolower($this->name);
       
   273             // We should only use the last Content-Type header. c.f. issue #1
       
   274             if (isset($this->headers[$this->name]) && $this->name !== 'content-type') {
       
   275                 $this->headers[$this->name] .= ', ' . $this->value;
       
   276             } else {
       
   277                 $this->headers[$this->name] = $this->value;
       
   278             }
       
   279         }
       
   280         $this->name = '';
       
   281         $this->value = '';
       
   282         if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A") {
       
   283             $this->position += 2;
       
   284             $this->state = self::STATE_BODY;
       
   285         } elseif ($this->data[$this->position] === "\x0A") {
       
   286             $this->position++;
       
   287             $this->state = self::STATE_BODY;
       
   288         } else {
       
   289             $this->state = self::STATE_NAME;
       
   290         }
       
   291     }
       
   292 
       
   293     /**
       
   294      * Parse a header name
       
   295      */
       
   296     protected function name()
       
   297     {
       
   298         $len = strcspn($this->data, "\x0A:", $this->position);
       
   299         if (isset($this->data[$this->position + $len])) {
       
   300             if ($this->data[$this->position + $len] === "\x0A") {
       
   301                 $this->position += $len;
       
   302                 $this->state = self::STATE_NEW_LINE;
       
   303             } else {
       
   304                 $this->name = substr($this->data, $this->position, $len);
       
   305                 $this->position += $len + 1;
       
   306                 $this->state = self::STATE_VALUE;
       
   307             }
       
   308         } else {
       
   309             $this->state = self::STATE_ERROR;
       
   310         }
       
   311     }
       
   312 
       
   313     /**
       
   314      * Parse LWS, replacing consecutive LWS characters with a single space
       
   315      */
       
   316     protected function linear_whitespace()
       
   317     {
       
   318         do {
       
   319             if (substr($this->data, $this->position, 2) === "\x0D\x0A") {
       
   320                 $this->position += 2;
       
   321             } elseif ($this->data[$this->position] === "\x0A") {
       
   322                 $this->position++;
       
   323             }
       
   324             $this->position += strspn($this->data, "\x09\x20", $this->position);
       
   325         } while ($this->has_data() && $this->is_linear_whitespace());
       
   326         $this->value .= "\x20";
       
   327     }
       
   328 
       
   329     /**
       
   330      * See what state to move to while within non-quoted header values
       
   331      */
       
   332     protected function value()
       
   333     {
       
   334         if ($this->is_linear_whitespace()) {
       
   335             $this->linear_whitespace();
       
   336         } else {
       
   337             switch ($this->data[$this->position]) {
       
   338                 case '"':
       
   339                     // Workaround for ETags: we have to include the quotes as
       
   340                     // part of the tag.
       
   341                     if (strtolower($this->name) === 'etag') {
       
   342                         $this->value .= '"';
       
   343                         $this->position++;
       
   344                         $this->state = self::STATE_VALUE_CHAR;
       
   345                         break;
       
   346                     }
       
   347                     $this->position++;
       
   348                     $this->state = self::STATE_QUOTE;
       
   349                     break;
       
   350 
       
   351                 case "\x0A":
       
   352                     $this->position++;
       
   353                     $this->state = self::STATE_NEW_LINE;
       
   354                     break;
       
   355 
       
   356                 default:
       
   357                     $this->state = self::STATE_VALUE_CHAR;
       
   358                     break;
       
   359             }
       
   360         }
       
   361     }
       
   362 
       
   363     /**
       
   364      * Parse a header value while outside quotes
       
   365      */
       
   366     protected function value_char()
       
   367     {
       
   368         $len = strcspn($this->data, "\x09\x20\x0A\"", $this->position);
       
   369         $this->value .= substr($this->data, $this->position, $len);
       
   370         $this->position += $len;
       
   371         $this->state = self::STATE_VALUE;
       
   372     }
       
   373 
       
   374     /**
       
   375      * See what state to move to while within quoted header values
       
   376      */
       
   377     protected function quote()
       
   378     {
       
   379         if ($this->is_linear_whitespace()) {
       
   380             $this->linear_whitespace();
       
   381         } else {
       
   382             switch ($this->data[$this->position]) {
       
   383                 case '"':
       
   384                     $this->position++;
       
   385                     $this->state = self::STATE_VALUE;
       
   386                     break;
       
   387 
       
   388                 case "\x0A":
       
   389                     $this->position++;
       
   390                     $this->state = self::STATE_NEW_LINE;
       
   391                     break;
       
   392 
       
   393                 case '\\':
       
   394                     $this->position++;
       
   395                     $this->state = self::STATE_QUOTE_ESCAPED;
       
   396                     break;
       
   397 
       
   398                 default:
       
   399                     $this->state = self::STATE_QUOTE_CHAR;
       
   400                     break;
       
   401             }
       
   402         }
       
   403     }
       
   404 
       
   405     /**
       
   406      * Parse a header value while within quotes
       
   407      */
       
   408     protected function quote_char()
       
   409     {
       
   410         $len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position);
       
   411         $this->value .= substr($this->data, $this->position, $len);
       
   412         $this->position += $len;
       
   413         $this->state = self::STATE_VALUE;
       
   414     }
       
   415 
       
   416     /**
       
   417      * Parse an escaped character within quotes
       
   418      */
       
   419     protected function quote_escaped()
       
   420     {
       
   421         $this->value .= $this->data[$this->position];
       
   422         $this->position++;
       
   423         $this->state = self::STATE_QUOTE;
       
   424     }
       
   425 
       
   426     /**
       
   427      * Parse the body
       
   428      */
       
   429     protected function body()
       
   430     {
       
   431         $this->body = substr($this->data, $this->position);
       
   432         if (!empty($this->headers['transfer-encoding'])) {
       
   433             unset($this->headers['transfer-encoding']);
       
   434             $this->state = self::STATE_CHUNKED;
       
   435         } else {
       
   436             $this->state = self::STATE_EMIT;
       
   437         }
       
   438     }
       
   439 
       
   440     /**
       
   441      * Parsed a "Transfer-Encoding: chunked" body
       
   442      */
       
   443     protected function chunked()
       
   444     {
       
   445         if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body))) {
       
   446             $this->state = self::STATE_EMIT;
       
   447             return;
       
   448         }
       
   449 
       
   450         $decoded = '';
       
   451         $encoded = $this->body;
       
   452 
       
   453         while (true) {
       
   454             $is_chunked = (bool) preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches);
       
   455             if (!$is_chunked) {
       
   456                 // Looks like it's not chunked after all
       
   457                 $this->state = self::STATE_EMIT;
       
   458                 return;
       
   459             }
       
   460 
       
   461             $length = hexdec(trim($matches[1]));
       
   462             if ($length === 0) {
       
   463                 // Ignore trailer headers
       
   464                 $this->state = self::STATE_EMIT;
       
   465                 $this->body = $decoded;
       
   466                 return;
       
   467             }
       
   468 
       
   469             $chunk_length = strlen($matches[0]);
       
   470             $decoded .= substr($encoded, $chunk_length, $length);
       
   471             $encoded = substr($encoded, $chunk_length + $length + 2);
       
   472 
       
   473             // BC for PHP < 8.0: substr() can return bool instead of string
       
   474             $encoded = ($encoded === false) ? '' : $encoded;
       
   475 
       
   476             if (trim($encoded) === '0' || empty($encoded)) {
       
   477                 $this->state = self::STATE_EMIT;
       
   478                 $this->body = $decoded;
       
   479                 return;
       
   480             }
       
   481         }
       
   482     }
       
   483 
       
   484     /**
       
   485      * Prepare headers (take care of proxies headers)
       
   486      *
       
   487      * @param string  $headers Raw headers
       
   488      * @param integer $count   Redirection count. Default to 1.
       
   489      *
       
   490      * @return string
       
   491      */
       
   492     public static function prepareHeaders($headers, $count = 1)
       
   493     {
       
   494         $data = explode("\r\n\r\n", $headers, $count);
       
   495         $data = array_pop($data);
       
   496         if (false !== stripos($data, "HTTP/1.0 200 Connection established\r\n")) {
       
   497             $exploded = explode("\r\n\r\n", $data, 2);
       
   498             $data = end($exploded);
       
   499         }
       
   500         if (false !== stripos($data, "HTTP/1.1 200 Connection established\r\n")) {
       
   501             $exploded = explode("\r\n\r\n", $data, 2);
       
   502             $data = end($exploded);
       
   503         }
       
   504         return $data;
       
   505     }
       
   506 }
       
   507 
       
   508 class_alias('SimplePie\HTTP\Parser', 'SimplePie_HTTP_Parser');