Spaces:
Sleeping
Sleeping
/** | |
* SimplePie | |
* | |
* A PHP-Based RSS and Atom Feed Framework. | |
* Takes the hard work out of managing a complete RSS/Atom solution. | |
* | |
* Copyright (c) 2004-2016, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors | |
* All rights reserved. | |
* | |
* Redistribution and use in source and binary forms, with or without modification, are | |
* permitted provided that the following conditions are met: | |
* | |
* * Redistributions of source code must retain the above copyright notice, this list of | |
* conditions and the following disclaimer. | |
* | |
* * Redistributions in binary form must reproduce the above copyright notice, this list | |
* of conditions and the following disclaimer in the documentation and/or other materials | |
* provided with the distribution. | |
* | |
* * Neither the name of the SimplePie Team nor the names of its contributors may be used | |
* to endorse or promote products derived from this software without specific prior | |
* written permission. | |
* | |
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS | |
* OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY | |
* AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS | |
* AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR | |
* CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR | |
* SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY | |
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR | |
* OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE | |
* POSSIBILITY OF SUCH DAMAGE. | |
* | |
* @package SimplePie | |
* @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue | |
* @author Ryan Parman | |
* @author Sam Sneddon | |
* @author Ryan McCue | |
* @link http://simplepie.org/ SimplePie | |
* @license http://www.opensource.org/licenses/bsd-license.php BSD License | |
*/ | |
/** | |
* HTTP Response Parser | |
* | |
* @package SimplePie | |
* @subpackage HTTP | |
*/ | |
class SimplePie_HTTP_Parser | |
{ | |
/** | |
* HTTP Version | |
* | |
* @var float | |
*/ | |
public $http_version = 0.0; | |
/** | |
* Status code | |
* | |
* @var int | |
*/ | |
public $status_code = 0; | |
/** | |
* Reason phrase | |
* | |
* @var string | |
*/ | |
public $reason = ''; | |
/** | |
* Key/value pairs of the headers | |
* | |
* @var array | |
*/ | |
public $headers = array(); | |
/** | |
* Body of the response | |
* | |
* @var string | |
*/ | |
public $body = ''; | |
/** | |
* Current state of the state machine | |
* | |
* @var string | |
*/ | |
protected $state = 'http_version'; | |
/** | |
* Input data | |
* | |
* @var string | |
*/ | |
protected $data = ''; | |
/** | |
* Input data length (to avoid calling strlen() everytime this is needed) | |
* | |
* @var int | |
*/ | |
protected $data_length = 0; | |
/** | |
* Current position of the pointer | |
* | |
* @var int | |
*/ | |
protected $position = 0; | |
/** | |
* Name of the hedaer currently being parsed | |
* | |
* @var string | |
*/ | |
protected $name = ''; | |
/** | |
* Value of the hedaer currently being parsed | |
* | |
* @var string | |
*/ | |
protected $value = ''; | |
/** | |
* Create an instance of the class with the input data | |
* | |
* @param string $data Input data | |
*/ | |
public function __construct($data) | |
{ | |
$this->data = $data; | |
$this->data_length = strlen($this->data); | |
} | |
/** | |
* Parse the input data | |
* | |
* @return bool true on success, false on failure | |
*/ | |
public function parse() | |
{ | |
while ($this->state && $this->state !== 'emit' && $this->has_data()) | |
{ | |
$state = $this->state; | |
$this->$state(); | |
} | |
$this->data = ''; | |
if ($this->state === 'emit' || $this->state === 'body') | |
{ | |
return true; | |
} | |
$this->http_version = ''; | |
$this->status_code = ''; | |
$this->reason = ''; | |
$this->headers = array(); | |
$this->body = ''; | |
return false; | |
} | |
/** | |
* Check whether there is data beyond the pointer | |
* | |
* @return bool true if there is further data, false if not | |
*/ | |
protected function has_data() | |
{ | |
return (bool) ($this->position < $this->data_length); | |
} | |
/** | |
* See if the next character is LWS | |
* | |
* @return bool true if the next character is LWS, false if not | |
*/ | |
protected function is_linear_whitespace() | |
{ | |
return (bool) ($this->data[$this->position] === "\x09" | |
|| $this->data[$this->position] === "\x20" | |
|| ($this->data[$this->position] === "\x0A" | |
&& isset($this->data[$this->position + 1]) | |
&& ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20"))); | |
} | |
/** | |
* Parse the HTTP version | |
*/ | |
protected function http_version() | |
{ | |
if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/') | |
{ | |
$len = strspn($this->data, '0123456789.', 5); | |
$this->http_version = substr($this->data, 5, $len); | |
$this->position += 5 + $len; | |
if (substr_count($this->http_version, '.') <= 1) | |
{ | |
$this->http_version = (float) $this->http_version; | |
$this->position += strspn($this->data, "\x09\x20", $this->position); | |
$this->state = 'status'; | |
} | |
else | |
{ | |
$this->state = false; | |
} | |
} | |
else | |
{ | |
$this->state = false; | |
} | |
} | |
/** | |
* Parse the status code | |
*/ | |
protected function status() | |
{ | |
if ($len = strspn($this->data, '0123456789', $this->position)) | |
{ | |
$this->status_code = (int) substr($this->data, $this->position, $len); | |
$this->position += $len; | |
$this->state = 'reason'; | |
} | |
else | |
{ | |
$this->state = false; | |
} | |
} | |
/** | |
* Parse the reason phrase | |
*/ | |
protected function reason() | |
{ | |
$len = strcspn($this->data, "\x0A", $this->position); | |
$this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20"); | |
$this->position += $len + 1; | |
$this->state = 'new_line'; | |
} | |
/** | |
* Deal with a new line, shifting data around as needed | |
*/ | |
protected function new_line() | |
{ | |
$this->value = trim($this->value, "\x0D\x20"); | |
if ($this->name !== '' && $this->value !== '') | |
{ | |
$this->name = strtolower($this->name); | |
// We should only use the last Content-Type header. c.f. issue #1 | |
if (isset($this->headers[$this->name]) && $this->name !== 'content-type') | |
{ | |
$this->headers[$this->name] .= ', ' . $this->value; | |
} | |
else | |
{ | |
$this->headers[$this->name] = $this->value; | |
} | |
} | |
$this->name = ''; | |
$this->value = ''; | |
if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A") | |
{ | |
$this->position += 2; | |
$this->state = 'body'; | |
} | |
elseif ($this->data[$this->position] === "\x0A") | |
{ | |
$this->position++; | |
$this->state = 'body'; | |
} | |
else | |
{ | |
$this->state = 'name'; | |
} | |
} | |
/** | |
* Parse a header name | |
*/ | |
protected function name() | |
{ | |
$len = strcspn($this->data, "\x0A:", $this->position); | |
if (isset($this->data[$this->position + $len])) | |
{ | |
if ($this->data[$this->position + $len] === "\x0A") | |
{ | |
$this->position += $len; | |
$this->state = 'new_line'; | |
} | |
else | |
{ | |
$this->name = substr($this->data, $this->position, $len); | |
$this->position += $len + 1; | |
$this->state = 'value'; | |
} | |
} | |
else | |
{ | |
$this->state = false; | |
} | |
} | |
/** | |
* Parse LWS, replacing consecutive LWS characters with a single space | |
*/ | |
protected function linear_whitespace() | |
{ | |
do | |
{ | |
if (substr($this->data, $this->position, 2) === "\x0D\x0A") | |
{ | |
$this->position += 2; | |
} | |
elseif ($this->data[$this->position] === "\x0A") | |
{ | |
$this->position++; | |
} | |
$this->position += strspn($this->data, "\x09\x20", $this->position); | |
} while ($this->has_data() && $this->is_linear_whitespace()); | |
$this->value .= "\x20"; | |
} | |
/** | |
* See what state to move to while within non-quoted header values | |
*/ | |
protected function value() | |
{ | |
if ($this->is_linear_whitespace()) | |
{ | |
$this->linear_whitespace(); | |
} | |
else | |
{ | |
switch ($this->data[$this->position]) | |
{ | |
case '"': | |
// Workaround for ETags: we have to include the quotes as | |
// part of the tag. | |
if (strtolower($this->name) === 'etag') | |
{ | |
$this->value .= '"'; | |
$this->position++; | |
$this->state = 'value_char'; | |
break; | |
} | |
$this->position++; | |
$this->state = 'quote'; | |
break; | |
case "\x0A": | |
$this->position++; | |
$this->state = 'new_line'; | |
break; | |
default: | |
$this->state = 'value_char'; | |
break; | |
} | |
} | |
} | |
/** | |
* Parse a header value while outside quotes | |
*/ | |
protected function value_char() | |
{ | |
$len = strcspn($this->data, "\x09\x20\x0A\"", $this->position); | |
$this->value .= substr($this->data, $this->position, $len); | |
$this->position += $len; | |
$this->state = 'value'; | |
} | |
/** | |
* See what state to move to while within quoted header values | |
*/ | |
protected function quote() | |
{ | |
if ($this->is_linear_whitespace()) | |
{ | |
$this->linear_whitespace(); | |
} | |
else | |
{ | |
switch ($this->data[$this->position]) | |
{ | |
case '"': | |
$this->position++; | |
$this->state = 'value'; | |
break; | |
case "\x0A": | |
$this->position++; | |
$this->state = 'new_line'; | |
break; | |
case '\\': | |
$this->position++; | |
$this->state = 'quote_escaped'; | |
break; | |
default: | |
$this->state = 'quote_char'; | |
break; | |
} | |
} | |
} | |
/** | |
* Parse a header value while within quotes | |
*/ | |
protected function quote_char() | |
{ | |
$len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position); | |
$this->value .= substr($this->data, $this->position, $len); | |
$this->position += $len; | |
$this->state = 'value'; | |
} | |
/** | |
* Parse an escaped character within quotes | |
*/ | |
protected function quote_escaped() | |
{ | |
$this->value .= $this->data[$this->position]; | |
$this->position++; | |
$this->state = 'quote'; | |
} | |
/** | |
* Parse the body | |
*/ | |
protected function body() | |
{ | |
$this->body = substr($this->data, $this->position); | |
if (!empty($this->headers['transfer-encoding'])) | |
{ | |
unset($this->headers['transfer-encoding']); | |
$this->state = 'chunked'; | |
} | |
else | |
{ | |
$this->state = 'emit'; | |
} | |
} | |
/** | |
* Parsed a "Transfer-Encoding: chunked" body | |
*/ | |
protected function chunked() | |
{ | |
if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body))) | |
{ | |
$this->state = 'emit'; | |
return; | |
} | |
$decoded = ''; | |
$encoded = $this->body; | |
while (true) | |
{ | |
$is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches ); | |
if (!$is_chunked) | |
{ | |
// Looks like it's not chunked after all | |
$this->state = 'emit'; | |
return; | |
} | |
$length = hexdec(trim($matches[1])); | |
if ($length === 0) | |
{ | |
// Ignore trailer headers | |
$this->state = 'emit'; | |
$this->body = $decoded; | |
return; | |
} | |
$chunk_length = strlen($matches[0]); | |
$decoded .= $part = substr($encoded, $chunk_length, $length); | |
$encoded = substr($encoded, $chunk_length + $length + 2); | |
if (trim($encoded) === '0' || empty($encoded)) | |
{ | |
$this->state = 'emit'; | |
$this->body = $decoded; | |
return; | |
} | |
} | |
} | |
/** | |
* Prepare headers (take care of proxies headers) | |
* | |
* @param string $headers Raw headers | |
* @param integer $count Redirection count. Default to 1. | |
* | |
* @return string | |
*/ | |
static public function prepareHeaders($headers, $count = 1) | |
{ | |
$data = explode("\r\n\r\n", $headers, $count); | |
$data = array_pop($data); | |
if (false !== stripos($data, "HTTP/1.0 200 Connection established\r\n")) { | |
$exploded = explode("\r\n\r\n", $data, 2); | |
$data = end($exploded); | |
} | |
if (false !== stripos($data, "HTTP/1.1 200 Connection established\r\n")) { | |
$exploded = explode("\r\n\r\n", $data, 2); | |
$data = end($exploded); | |
} | |
return $data; | |
} | |
} | |