521 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
			
		
		
	
	
			521 lines
		
	
	
		
			11 KiB
		
	
	
	
		
			PHP
		
	
	
	
	
	
<?php
 | 
						|
/**
 | 
						|
 * SimplePie
 | 
						|
 *
 | 
						|
 * A PHP-Based RSS and Atom Feed Framework.
 | 
						|
 * Takes the hard work out of managing a complete RSS/Atom solution.
 | 
						|
 *
 | 
						|
 * Copyright (c) 2004-2016, Ryan Parman, Sam Sneddon, Ryan McCue, and contributors
 | 
						|
 * All rights reserved.
 | 
						|
 *
 | 
						|
 * Redistribution and use in source and binary forms, with or without modification, are
 | 
						|
 * permitted provided that the following conditions are met:
 | 
						|
 *
 | 
						|
 * 	* Redistributions of source code must retain the above copyright notice, this list of
 | 
						|
 * 	  conditions and the following disclaimer.
 | 
						|
 *
 | 
						|
 * 	* Redistributions in binary form must reproduce the above copyright notice, this list
 | 
						|
 * 	  of conditions and the following disclaimer in the documentation and/or other materials
 | 
						|
 * 	  provided with the distribution.
 | 
						|
 *
 | 
						|
 * 	* Neither the name of the SimplePie Team nor the names of its contributors may be used
 | 
						|
 * 	  to endorse or promote products derived from this software without specific prior
 | 
						|
 * 	  written permission.
 | 
						|
 *
 | 
						|
 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND ANY EXPRESS
 | 
						|
 * OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY
 | 
						|
 * AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDERS
 | 
						|
 * AND CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
 | 
						|
 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR
 | 
						|
 * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
 | 
						|
 * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR
 | 
						|
 * OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
 | 
						|
 * POSSIBILITY OF SUCH DAMAGE.
 | 
						|
 *
 | 
						|
 * @package SimplePie
 | 
						|
 * @copyright 2004-2016 Ryan Parman, Sam Sneddon, Ryan McCue
 | 
						|
 * @author Ryan Parman
 | 
						|
 * @author Sam Sneddon
 | 
						|
 * @author Ryan McCue
 | 
						|
 * @link http://simplepie.org/ SimplePie
 | 
						|
 * @license http://www.opensource.org/licenses/bsd-license.php BSD License
 | 
						|
 */
 | 
						|
 | 
						|
 | 
						|
/**
 | 
						|
 * HTTP Response Parser
 | 
						|
 *
 | 
						|
 * @package SimplePie
 | 
						|
 * @subpackage HTTP
 | 
						|
 */
 | 
						|
class SimplePie_HTTP_Parser
 | 
						|
{
 | 
						|
	/**
 | 
						|
	 * HTTP Version
 | 
						|
	 *
 | 
						|
	 * @var float
 | 
						|
	 */
 | 
						|
	public $http_version = 0.0;
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Status code
 | 
						|
	 *
 | 
						|
	 * @var int
 | 
						|
	 */
 | 
						|
	public $status_code = 0;
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Reason phrase
 | 
						|
	 *
 | 
						|
	 * @var string
 | 
						|
	 */
 | 
						|
	public $reason = '';
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Key/value pairs of the headers
 | 
						|
	 *
 | 
						|
	 * @var array
 | 
						|
	 */
 | 
						|
	public $headers = array();
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Body of the response
 | 
						|
	 *
 | 
						|
	 * @var string
 | 
						|
	 */
 | 
						|
	public $body = '';
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Current state of the state machine
 | 
						|
	 *
 | 
						|
	 * @var string
 | 
						|
	 */
 | 
						|
	protected $state = 'http_version';
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Input data
 | 
						|
	 *
 | 
						|
	 * @var string
 | 
						|
	 */
 | 
						|
	protected $data = '';
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Input data length (to avoid calling strlen() everytime this is needed)
 | 
						|
	 *
 | 
						|
	 * @var int
 | 
						|
	 */
 | 
						|
	protected $data_length = 0;
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Current position of the pointer
 | 
						|
	 *
 | 
						|
	 * @var int
 | 
						|
	 */
 | 
						|
	protected $position = 0;
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Name of the hedaer currently being parsed
 | 
						|
	 *
 | 
						|
	 * @var string
 | 
						|
	 */
 | 
						|
	protected $name = '';
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Value of the hedaer currently being parsed
 | 
						|
	 *
 | 
						|
	 * @var string
 | 
						|
	 */
 | 
						|
	protected $value = '';
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Create an instance of the class with the input data
 | 
						|
	 *
 | 
						|
	 * @param string $data Input data
 | 
						|
	 */
 | 
						|
	public function __construct($data)
 | 
						|
	{
 | 
						|
		$this->data = $data;
 | 
						|
		$this->data_length = strlen($this->data);
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Parse the input data
 | 
						|
	 *
 | 
						|
	 * @return bool true on success, false on failure
 | 
						|
	 */
 | 
						|
	public function parse()
 | 
						|
	{
 | 
						|
		while ($this->state && $this->state !== 'emit' && $this->has_data())
 | 
						|
		{
 | 
						|
			$state = $this->state;
 | 
						|
			$this->$state();
 | 
						|
		}
 | 
						|
		$this->data = '';
 | 
						|
		if ($this->state === 'emit' || $this->state === 'body')
 | 
						|
		{
 | 
						|
			return true;
 | 
						|
		}
 | 
						|
 | 
						|
		$this->http_version = '';
 | 
						|
		$this->status_code = '';
 | 
						|
		$this->reason = '';
 | 
						|
		$this->headers = array();
 | 
						|
		$this->body = '';
 | 
						|
		return false;
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Check whether there is data beyond the pointer
 | 
						|
	 *
 | 
						|
	 * @return bool true if there is further data, false if not
 | 
						|
	 */
 | 
						|
	protected function has_data()
 | 
						|
	{
 | 
						|
		return (bool) ($this->position < $this->data_length);
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * See if the next character is LWS
 | 
						|
	 *
 | 
						|
	 * @return bool true if the next character is LWS, false if not
 | 
						|
	 */
 | 
						|
	protected function is_linear_whitespace()
 | 
						|
	{
 | 
						|
		return (bool) ($this->data[$this->position] === "\x09"
 | 
						|
			|| $this->data[$this->position] === "\x20"
 | 
						|
			|| ($this->data[$this->position] === "\x0A"
 | 
						|
				&& isset($this->data[$this->position + 1])
 | 
						|
				&& ($this->data[$this->position + 1] === "\x09" || $this->data[$this->position + 1] === "\x20")));
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Parse the HTTP version
 | 
						|
	 */
 | 
						|
	protected function http_version()
 | 
						|
	{
 | 
						|
		if (strpos($this->data, "\x0A") !== false && strtoupper(substr($this->data, 0, 5)) === 'HTTP/')
 | 
						|
		{
 | 
						|
			$len = strspn($this->data, '0123456789.', 5);
 | 
						|
			$this->http_version = substr($this->data, 5, $len);
 | 
						|
			$this->position += 5 + $len;
 | 
						|
			if (substr_count($this->http_version, '.') <= 1)
 | 
						|
			{
 | 
						|
				$this->http_version = (float) $this->http_version;
 | 
						|
				$this->position += strspn($this->data, "\x09\x20", $this->position);
 | 
						|
				$this->state = 'status';
 | 
						|
			}
 | 
						|
			else
 | 
						|
			{
 | 
						|
				$this->state = false;
 | 
						|
			}
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			$this->state = false;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Parse the status code
 | 
						|
	 */
 | 
						|
	protected function status()
 | 
						|
	{
 | 
						|
		if ($len = strspn($this->data, '0123456789', $this->position))
 | 
						|
		{
 | 
						|
			$this->status_code = (int) substr($this->data, $this->position, $len);
 | 
						|
			$this->position += $len;
 | 
						|
			$this->state = 'reason';
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			$this->state = false;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Parse the reason phrase
 | 
						|
	 */
 | 
						|
	protected function reason()
 | 
						|
	{
 | 
						|
		$len = strcspn($this->data, "\x0A", $this->position);
 | 
						|
		$this->reason = trim(substr($this->data, $this->position, $len), "\x09\x0D\x20");
 | 
						|
		$this->position += $len + 1;
 | 
						|
		$this->state = 'new_line';
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Deal with a new line, shifting data around as needed
 | 
						|
	 */
 | 
						|
	protected function new_line()
 | 
						|
	{
 | 
						|
		$this->value = trim($this->value, "\x0D\x20");
 | 
						|
		if ($this->name !== '' && $this->value !== '')
 | 
						|
		{
 | 
						|
			$this->name = strtolower($this->name);
 | 
						|
			// We should only use the last Content-Type header. c.f. issue #1
 | 
						|
			if (isset($this->headers[$this->name]) && $this->name !== 'content-type')
 | 
						|
			{
 | 
						|
				$this->headers[$this->name] .= ', ' . $this->value;
 | 
						|
			}
 | 
						|
			else
 | 
						|
			{
 | 
						|
				$this->headers[$this->name] = $this->value;
 | 
						|
			}
 | 
						|
		}
 | 
						|
		$this->name = '';
 | 
						|
		$this->value = '';
 | 
						|
		if (substr($this->data[$this->position], 0, 2) === "\x0D\x0A")
 | 
						|
		{
 | 
						|
			$this->position += 2;
 | 
						|
			$this->state = 'body';
 | 
						|
		}
 | 
						|
		elseif ($this->data[$this->position] === "\x0A")
 | 
						|
		{
 | 
						|
			$this->position++;
 | 
						|
			$this->state = 'body';
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			$this->state = 'name';
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Parse a header name
 | 
						|
	 */
 | 
						|
	protected function name()
 | 
						|
	{
 | 
						|
		$len = strcspn($this->data, "\x0A:", $this->position);
 | 
						|
		if (isset($this->data[$this->position + $len]))
 | 
						|
		{
 | 
						|
			if ($this->data[$this->position + $len] === "\x0A")
 | 
						|
			{
 | 
						|
				$this->position += $len;
 | 
						|
				$this->state = 'new_line';
 | 
						|
			}
 | 
						|
			else
 | 
						|
			{
 | 
						|
				$this->name = substr($this->data, $this->position, $len);
 | 
						|
				$this->position += $len + 1;
 | 
						|
				$this->state = 'value';
 | 
						|
			}
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			$this->state = false;
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Parse LWS, replacing consecutive LWS characters with a single space
 | 
						|
	 */
 | 
						|
	protected function linear_whitespace()
 | 
						|
	{
 | 
						|
		do
 | 
						|
		{
 | 
						|
			if (substr($this->data, $this->position, 2) === "\x0D\x0A")
 | 
						|
			{
 | 
						|
				$this->position += 2;
 | 
						|
			}
 | 
						|
			elseif ($this->data[$this->position] === "\x0A")
 | 
						|
			{
 | 
						|
				$this->position++;
 | 
						|
			}
 | 
						|
			$this->position += strspn($this->data, "\x09\x20", $this->position);
 | 
						|
		} while ($this->has_data() && $this->is_linear_whitespace());
 | 
						|
		$this->value .= "\x20";
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * See what state to move to while within non-quoted header values
 | 
						|
	 */
 | 
						|
	protected function value()
 | 
						|
	{
 | 
						|
		if ($this->is_linear_whitespace())
 | 
						|
		{
 | 
						|
			$this->linear_whitespace();
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			switch ($this->data[$this->position])
 | 
						|
			{
 | 
						|
				case '"':
 | 
						|
					// Workaround for ETags: we have to include the quotes as
 | 
						|
					// part of the tag.
 | 
						|
					if (strtolower($this->name) === 'etag')
 | 
						|
					{
 | 
						|
						$this->value .= '"';
 | 
						|
						$this->position++;
 | 
						|
						$this->state = 'value_char';
 | 
						|
						break;
 | 
						|
					}
 | 
						|
					$this->position++;
 | 
						|
					$this->state = 'quote';
 | 
						|
					break;
 | 
						|
 | 
						|
				case "\x0A":
 | 
						|
					$this->position++;
 | 
						|
					$this->state = 'new_line';
 | 
						|
					break;
 | 
						|
 | 
						|
				default:
 | 
						|
					$this->state = 'value_char';
 | 
						|
					break;
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Parse a header value while outside quotes
 | 
						|
	 */
 | 
						|
	protected function value_char()
 | 
						|
	{
 | 
						|
		$len = strcspn($this->data, "\x09\x20\x0A\"", $this->position);
 | 
						|
		$this->value .= substr($this->data, $this->position, $len);
 | 
						|
		$this->position += $len;
 | 
						|
		$this->state = 'value';
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * See what state to move to while within quoted header values
 | 
						|
	 */
 | 
						|
	protected function quote()
 | 
						|
	{
 | 
						|
		if ($this->is_linear_whitespace())
 | 
						|
		{
 | 
						|
			$this->linear_whitespace();
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			switch ($this->data[$this->position])
 | 
						|
			{
 | 
						|
				case '"':
 | 
						|
					$this->position++;
 | 
						|
					$this->state = 'value';
 | 
						|
					break;
 | 
						|
 | 
						|
				case "\x0A":
 | 
						|
					$this->position++;
 | 
						|
					$this->state = 'new_line';
 | 
						|
					break;
 | 
						|
 | 
						|
				case '\\':
 | 
						|
					$this->position++;
 | 
						|
					$this->state = 'quote_escaped';
 | 
						|
					break;
 | 
						|
 | 
						|
				default:
 | 
						|
					$this->state = 'quote_char';
 | 
						|
					break;
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Parse a header value while within quotes
 | 
						|
	 */
 | 
						|
	protected function quote_char()
 | 
						|
	{
 | 
						|
		$len = strcspn($this->data, "\x09\x20\x0A\"\\", $this->position);
 | 
						|
		$this->value .= substr($this->data, $this->position, $len);
 | 
						|
		$this->position += $len;
 | 
						|
		$this->state = 'value';
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Parse an escaped character within quotes
 | 
						|
	 */
 | 
						|
	protected function quote_escaped()
 | 
						|
	{
 | 
						|
		$this->value .= $this->data[$this->position];
 | 
						|
		$this->position++;
 | 
						|
		$this->state = 'quote';
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Parse the body
 | 
						|
	 */
 | 
						|
	protected function body()
 | 
						|
	{
 | 
						|
		$this->body = substr($this->data, $this->position);
 | 
						|
		if (!empty($this->headers['transfer-encoding']))
 | 
						|
		{
 | 
						|
			unset($this->headers['transfer-encoding']);
 | 
						|
			$this->state = 'chunked';
 | 
						|
		}
 | 
						|
		else
 | 
						|
		{
 | 
						|
			$this->state = 'emit';
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Parsed a "Transfer-Encoding: chunked" body
 | 
						|
	 */
 | 
						|
	protected function chunked()
 | 
						|
	{
 | 
						|
		if (!preg_match('/^([0-9a-f]+)[^\r\n]*\r\n/i', trim($this->body)))
 | 
						|
		{
 | 
						|
			$this->state = 'emit';
 | 
						|
			return;
 | 
						|
		}
 | 
						|
 | 
						|
		$decoded = '';
 | 
						|
		$encoded = $this->body;
 | 
						|
 | 
						|
		while (true)
 | 
						|
		{
 | 
						|
			$is_chunked = (bool) preg_match( '/^([0-9a-f]+)[^\r\n]*\r\n/i', $encoded, $matches );
 | 
						|
			if (!$is_chunked)
 | 
						|
			{
 | 
						|
				// Looks like it's not chunked after all
 | 
						|
				$this->state = 'emit';
 | 
						|
				return;
 | 
						|
			}
 | 
						|
 | 
						|
			$length = hexdec(trim($matches[1]));
 | 
						|
			if ($length === 0)
 | 
						|
			{
 | 
						|
				// Ignore trailer headers
 | 
						|
				$this->state = 'emit';
 | 
						|
				$this->body = $decoded;
 | 
						|
				return;
 | 
						|
			}
 | 
						|
 | 
						|
			$chunk_length = strlen($matches[0]);
 | 
						|
			$decoded .= $part = substr($encoded, $chunk_length, $length);
 | 
						|
			$encoded = substr($encoded, $chunk_length + $length + 2);
 | 
						|
 | 
						|
			if (trim($encoded) === '0' || empty($encoded))
 | 
						|
			{
 | 
						|
				$this->state = 'emit';
 | 
						|
				$this->body = $decoded;
 | 
						|
				return;
 | 
						|
			}
 | 
						|
		}
 | 
						|
	}
 | 
						|
 | 
						|
	/**
 | 
						|
	 * Prepare headers (take care of proxies headers)
 | 
						|
	 *
 | 
						|
	 * @param string  $headers Raw headers
 | 
						|
	 * @param integer $count   Redirection count. Default to 1.
 | 
						|
	 *
 | 
						|
	 * @return string
 | 
						|
	 */
 | 
						|
	static public function prepareHeaders($headers, $count = 1)
 | 
						|
	{
 | 
						|
		$data = explode("\r\n\r\n", $headers, $count);
 | 
						|
		$data = array_pop($data);
 | 
						|
		if (false !== stripos($data, "HTTP/1.0 200 Connection established\r\n")) {
 | 
						|
			$exploded = explode("\r\n\r\n", $data, 2);
 | 
						|
			$data = end($exploded);
 | 
						|
		}
 | 
						|
		if (false !== stripos($data, "HTTP/1.1 200 Connection established\r\n")) {
 | 
						|
			$exploded = explode("\r\n\r\n", $data, 2);
 | 
						|
			$data = end($exploded);
 | 
						|
		}
 | 
						|
		return $data;
 | 
						|
	}
 | 
						|
}
 |