web/wp-includes/SimplePie/HTTP/Parser.php
changeset 204 09a1c134465b
equal deleted inserted replaced
203:f507feede89a 204:09a1c134465b
       
     1 <?php
       
     2 /**
       
     3  * SimplePie
       
     4  *
       
     5  * A PHP-Based RSS and Atom Feed Framework.
       
     6  * Takes the hard work out of managing a complete RSS/Atom solution.
       
     7  *
       
     8  * Copyright (c) 2004-2012, Ryan Parman, Geoffrey Sneddon, Ryan McCue, and contributors
       
     9  * All rights reserved.
       
    10  *
       
    11  * Redistribution and use in source and binary forms, with or without modification, are
       
    12  * permitted provided that the following conditions are met:
       
    13  *
       
    14  * 	* Redistributions of source code must retain the above copyright notice, this list of
       
    15  * 	  conditions and the following disclaimer.
       
    16  *
       
    17  * 	* Redistributions in binary form must reproduce the above copyright notice, this list
       
    18  * 	  of conditions and the following disclaimer in the documentation and/or other materials
       
    19  * 	  provided with the distribution.
       
    20  *
       
    21  * 	* Neither the name of the SimplePie Team nor the names of its contributors may be used
       
    22  * 	  to endorse or promote products derived from this software without specific prior
       
    23  * 	  written permission.
       
    24  *
       
    25  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
       
    26  * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
       
    27  * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
       
    28  * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
       
    29  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
       
    30  * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
       
    31  * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
       
    32  * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
       
    33  * POSSIBILITY OF SUCH DAMAGE.
       
    34  *
       
    35  * @package SimplePie
       
    36  * @version 1.3.1
       
    37  * @copyright 2004-2012 Ryan Parman, Geoffrey Sneddon, Ryan McCue
       
    38  * @author Ryan Parman
       
    39  * @author Geoffrey Sneddon
       
    40  * @author Ryan McCue
       
    41  * @link http://simplepie.org/ SimplePie
       
    42  * @license http://www.opensource.org/licenses/bsd-license.php BSD License
       
    43  */
       
    44 
       
    45 
       
    46 /**
       
    47  * HTTP Response Parser
       
    48  *
       
    49  * @package SimplePie
       
    50  * @subpackage HTTP
       
    51  */
       
    52 class SimplePie_HTTP_Parser
       
    53 {
       
    54 	/**
       
    55 	 * HTTP Version
       
    56 	 *
       
    57 	 * @var float
       
    58 	 */
       
    59 	public $http_version = 0.0;
       
    60 
       
    61 	/**
       
    62 	 * Status code
       
    63 	 *
       
    64 	 * @var int
       
    65 	 */
       
    66 	public $status_code = 0;
       
    67 
       
    68 	/**
       
    69 	 * Reason phrase
       
    70 	 *
       
    71 	 * @var string
       
    72 	 */
       
    73 	public $reason = '';
       
    74 
       
    75 	/**
       
    76 	 * Key/value pairs of the headers
       
    77 	 *
       
    78 	 * @var array
       
    79 	 */
       
    80 	public $headers = array();
       
    81 
       
    82 	/**
       
    83 	 * Body of the response
       
    84 	 *
       
    85 	 * @var string
       
    86 	 */
       
    87 	public $body = '';
       
    88 
       
    89 	/**
       
    90 	 * Current state of the state machine
       
    91 	 *
       
    92 	 * @var string
       
    93 	 */
       
    94 	protected $state = 'http_version';
       
    95 
       
    96 	/**
       
    97 	 * Input data
       
    98 	 *
       
    99 	 * @var string
       
   100 	 */
       
   101 	protected $data = '';
       
   102 
       
   103 	/**
       
   104 	 * Input data length (to avoid calling strlen() everytime this is needed)
       
   105 	 *
       
   106 	 * @var int
       
   107 	 */
       
   108 	protected $data_length = 0;
       
   109 
       
   110 	/**
       
   111 	 * Current position of the pointer
       
   112 	 *
       
   113 	 * @var int
       
   114 	 */
       
   115 	protected $position = 0;
       
   116 
       
   117 	/**
       
   118 	 * Name of the hedaer currently being parsed
       
   119 	 *
       
   120 	 * @var string
       
   121 	 */
       
   122 	protected $name = '';
       
   123 
       
   124 	/**
       
   125 	 * Value of the hedaer currently being parsed
       
   126 	 *
       
   127 	 * @var string
       
   128 	 */
       
   129 	protected $value = '';
       
   130 
       
   131 	/**
       
   132 	 * Create an instance of the class with the input data
       
   133 	 *
       
   134 	 * @param string $data Input data
       
   135 	 */
       
   136 	public function __construct($data)
       
   137 	{
       
   138 		$this->data = $data;
       
   139 		$this->data_length = strlen($this->data);
       
   140 	}
       
   141 
       
   142 	/**
       
   143 	 * Parse the input data
       
   144 	 *
       
   145 	 * @return bool true on success, false on failure
       
   146 	 */
       
   147 	public function parse()
       
   148 	{
       
   149 		while ($this->state && $this->state !== 'emit' && $this->has_data())
       
   150 		{
       
   151 			$state = $this->state;
       
   152 			$this->$state();
       
   153 		}
       
   154 		$this->data = '';
       
   155 		if ($this->state === 'emit' || $this->state === 'body')
       
   156 		{
       
   157 			return true;
       
   158 		}
       
   159 		else
       
   160 		{
       
   161 			$this->http_version = '';
       
   162 			$this->status_code = '';
       
   163 			$this->reason = '';
       
   164 			$this->headers = array();
       
   165 			$this->body = '';
       
   166 			return false;
       
   167 		}
       
   168 	}
       
   169 
       
   170 	/**
       
   171 	 * Check whether there is data beyond the pointer
       
   172 	 *
       
   173 	 * @return bool true if there is further data, false if not
       
   174 	 */
       
   175 	protected function has_data()
       
   176 	{
       
   177 		return (bool) ($this->position < $this->data_length);
       
   178 	}
       
   179 
       
   180 	/**
       
   181 	 * See if the next character is LWS
       
   182 	 *
       
   183 	 * @return bool true if the next character is LWS, false if not
       
   184 	 */
       
   185 	protected function is_linear_whitespace()
       
   186 	{
       
   187 		return (bool) ($this->data[$this->position] === "\x09"
       
   188 			|| $this->data[$this->position] === "\x20"
       
   189 			|| ($this->data[$this->position] === "\x0A"
       
   190 				&& isset($this->data[$this->position + 1])
       
   191 				&& ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20")));
       
   192 	}
       
   193 
       
   194 	/**
       
   195 	 * Parse the HTTP version
       
   196 	 */
       
   197 	protected function http_version()
       
   198 	{
       
   199 		if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/')
       
   200 		{
       
   201 			$len = strspn($this->data, '0123456789.', 5);
       
   202 			$this->http_version = substr($this->data, 5, $len);
       
   203 			$this->position += 5 + $len;
       
   204 			if (substr_count($this->http_version, '.') <= 1)
       
   205 			{
       
   206 				$this->http_version = (float) $this->http_version;
       
   207 				$this->position += strspn($this->data, "\x09\x20", $this->position);
       
   208 				$this->state = 'status';
       
   209 			}
       
   210 			else
       
   211 			{
       
   212 				$this->state = false;
       
   213 			}
       
   214 		}
       
   215 		else
       
   216 		{
       
   217 			$this->state = false;
       
   218 		}
       
   219 	}
       
   220 
       
   221 	/**
       
   222 	 * Parse the status code
       
   223 	 */
       
   224 	protected function status()
       
   225 	{
       
   226 		if ($len = strspn($this->data, '0123456789', $this->position))
       
   227 		{
       
   228 			$this->status_code = (int) substr($this->data, $this->position, $len);
       
   229 			$this->position += $len;
       
   230 			$this->state = 'reason';
       
   231 		}
       
   232 		else
       
   233 		{
       
   234 			$this->state = false;
       
   235 		}
       
   236 	}
       
   237 
       
   238 	/**
       
   239 	 * Parse the reason phrase
       
   240 	 */
       
   241 	protected function reason()
       
   242 	{
       
   243 		$len = strcspn($this->data, "\x0A", $this->position);
       
   244 		$this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20");
       
   245 		$this->position += $len + 1;
       
   246 		$this->state = 'new_line';
       
   247 	}
       
   248 
       
   249 	/**
       
   250 	 * Deal with a new line, shifting data around as needed
       
   251 	 */
       
   252 	protected function new_line()
       
   253 	{
       
   254 		$this->value = trim($this->value, "\x0D\x20");
       
   255 		if ($this->name !== '' && $this->value !== '')
       
   256 		{
       
   257 			$this->name = strtolower($this->name);
       
   258 			// We should only use the last Content-Type header. c.f. issue #1
       
   259 			if (isset($this->headers[$this->name]) && $this->name !== 'content-type')
       
   260 			{
       
   261 				$this->headers[$this->name] .= ', ' . $this->value;
       
   262 			}
       
   263 			else
       
   264 			{
       
   265 				$this->headers[$this->name] = $this->value;
       
   266 			}
       
   267 		}
       
   268 		$this->name = '';
       
   269 		$this->value = '';
       
   270 		if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A")
       
   271 		{
       
   272 			$this->position += 2;
       
   273 			$this->state = 'body';
       
   274 		}
       
   275 		elseif ($this->data[$this->position] === "\x0A")
       
   276 		{
       
   277 			$this->position++;
       
   278 			$this->state = 'body';
       
   279 		}
       
   280 		else
       
   281 		{
       
   282 			$this->state = 'name';
       
   283 		}
       
   284 	}
       
   285 
       
   286 	/**
       
   287 	 * Parse a header name
       
   288 	 */
       
   289 	protected function name()
       
   290 	{
       
   291 		$len = strcspn($this->data, "\x0A:", $this->position);
       
   292 		if (isset($this->data[$this->position + $len]))
       
   293 		{
       
   294 			if ($this->data[$this->position + $len] === "\x0A")
       
   295 			{
       
   296 				$this->position += $len;
       
   297 				$this->state = 'new_line';
       
   298 			}
       
   299 			else
       
   300 			{
       
   301 				$this->name = substr($this->data, $this->position, $len);
       
   302 				$this->position += $len + 1;
       
   303 				$this->state = 'value';
       
   304 			}
       
   305 		}
       
   306 		else
       
   307 		{
       
   308 			$this->state = false;
       
   309 		}
       
   310 	}
       
   311 
       
   312 	/**
       
   313 	 * Parse LWS, replacing consecutive LWS characters with a single space
       
   314 	 */
       
   315 	protected function linear_whitespace()
       
   316 	{
       
   317 		do
       
   318 		{
       
   319 			if (substr($this->data, $this->position, 2) === "\x0D\x0A")
       
   320 			{
       
   321 				$this->position += 2;
       
   322 			}
       
   323 			elseif ($this->data[$this->position] === "\x0A")
       
   324 			{
       
   325 				$this->position++;
       
   326 			}
       
   327 			$this->position += strspn($this->data, "\x09\x20", $this->position);
       
   328 		} while ($this->has_data() && $this->is_linear_whitespace());
       
   329 		$this->value .= "\x20";
       
   330 	}
       
   331 
       
   332 	/**
       
   333 	 * See what state to move to while within non-quoted header values
       
   334 	 */
       
   335 	protected function value()
       
   336 	{
       
   337 		if ($this->is_linear_whitespace())
       
   338 		{
       
   339 			$this->linear_whitespace();
       
   340 		}
       
   341 		else
       
   342 		{
       
   343 			switch ($this->data[$this->position])
       
   344 			{
       
   345 				case '"':
       
   346 					// Workaround for ETags: we have to include the quotes as
       
   347 					// part of the tag.
       
   348 					if (strtolower($this->name) === 'etag')
       
   349 					{
       
   350 						$this->value .= '"';
       
   351 						$this->position++;
       
   352 						$this->state = 'value_char';
       
   353 						break;
       
   354 					}
       
   355 					$this->position++;
       
   356 					$this->state = 'quote';
       
   357 					break;
       
   358 
       
   359 				case "\x0A":
       
   360 					$this->position++;
       
   361 					$this->state = 'new_line';
       
   362 					break;
       
   363 
       
   364 				default:
       
   365 					$this->state = 'value_char';
       
   366 					break;
       
   367 			}
       
   368 		}
       
   369 	}
       
   370 
       
   371 	/**
       
   372 	 * Parse a header value while outside quotes
       
   373 	 */
       
   374 	protected function value_char()
       
   375 	{
       
   376 		$len = strcspn($this->data, "\x09\x20\x0A\"", $this->position);
       
   377 		$this->value .= substr($this->data, $this->position, $len);
       
   378 		$this->position += $len;
       
   379 		$this->state = 'value';
       
   380 	}
       
   381 
       
   382 	/**
       
   383 	 * See what state to move to while within quoted header values
       
   384 	 */
       
   385 	protected function quote()
       
   386 	{
       
   387 		if ($this->is_linear_whitespace())
       
   388 		{
       
   389 			$this->linear_whitespace();
       
   390 		}
       
   391 		else
       
   392 		{
       
   393 			switch ($this->data[$this->position])
       
   394 			{
       
   395 				case '"':
       
   396 					$this->position++;
       
   397 					$this->state = 'value';
       
   398 					break;
       
   399 
       
   400 				case "\x0A":
       
   401 					$this->position++;
       
   402 					$this->state = 'new_line';
       
   403 					break;
       
   404 
       
   405 				case '\\':
       
   406 					$this->position++;
       
   407 					$this->state = 'quote_escaped';
       
   408 					break;
       
   409 
       
   410 				default:
       
   411 					$this->state = 'quote_char';
       
   412 					break;
       
   413 			}
       
   414 		}
       
   415 	}
       
   416 
       
   417 	/**
       
   418 	 * Parse a header value while within quotes
       
   419 	 */
       
   420 	protected function quote_char()
       
   421 	{
       
   422 		$len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position);
       
   423 		$this->value .= substr($this->data, $this->position, $len);
       
   424 		$this->position += $len;
       
   425 		$this->state = 'value';
       
   426 	}
       
   427 
       
   428 	/**
       
   429 	 * Parse an escaped character within quotes
       
   430 	 */
       
   431 	protected function quote_escaped()
       
   432 	{
       
   433 		$this->value .= $this->data[$this->position];
       
   434 		$this->position++;
       
   435 		$this->state = 'quote';
       
   436 	}
       
   437 
       
   438 	/**
       
   439 	 * Parse the body
       
   440 	 */
       
   441 	protected function body()
       
   442 	{
       
   443 		$this->body = substr($this->data, $this->position);
       
   444 		if (!empty($this->headers['transfer-encoding']))
       
   445 		{
       
   446 			unset($this->headers['transfer-encoding']);
       
   447 			$this->state = 'chunked';
       
   448 		}
       
   449 		else
       
   450 		{
       
   451 			$this->state = 'emit';
       
   452 		}
       
   453 	}
       
   454 
       
   455 	/**
       
   456 	 * Parsed a "Transfer-Encoding: chunked" body
       
   457 	 */
       
   458 	protected function chunked()
       
   459 	{
       
   460 		if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body)))
       
   461 		{
       
   462 			$this->state = 'emit';
       
   463 			return;
       
   464 		}
       
   465 
       
   466 		$decoded = '';
       
   467 		$encoded = $this->body;
       
   468 
       
   469 		while (true)
       
   470 		{
       
   471 			$is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches );
       
   472 			if (!$is_chunked)
       
   473 			{
       
   474 				// Looks like it's not chunked after all
       
   475 				$this->state = 'emit';
       
   476 				return;
       
   477 			}
       
   478 
       
   479 			$length = hexdec(trim($matches[1]));
       
   480 			if ($length === 0)
       
   481 			{
       
   482 				// Ignore trailer headers
       
   483 				$this->state = 'emit';
       
   484 				$this->body = $decoded;
       
   485 				return;
       
   486 			}
       
   487 
       
   488 			$chunk_length = strlen($matches[0]);
       
   489 			$decoded .= $part = substr($encoded, $chunk_length, $length);
       
   490 			$encoded = substr($encoded, $chunk_length + $length + 2);
       
   491 
       
   492 			if (trim($encoded) === '0' || empty($encoded))
       
   493 			{
       
   494 				$this->state = 'emit';
       
   495 				$this->body = $decoded;
       
   496 				return;
       
   497 			}
       
   498 		}
       
   499 	}
       
   500 }