web/Zend/Feed.php
changeset 0 4eba9c11703f
equal deleted inserted replaced
-1:000000000000 0:4eba9c11703f
       
     1 <?php
       
     2 
       
     3 /**
       
     4  * Zend Framework
       
     5  *
       
     6  * LICENSE
       
     7  *
       
     8  * This source file is subject to the new BSD license that is bundled
       
     9  * with this package in the file LICENSE.txt.
       
    10  * It is also available through the world-wide-web at this URL:
       
    11  * http://framework.zend.com/license/new-bsd
       
    12  * If you did not receive a copy of the license and are unable to
       
    13  * obtain it through the world-wide-web, please send an email
       
    14  * to license@zend.com so we can send you a copy immediately.
       
    15  *
       
    16  * @category   Zend
       
    17  * @package    Zend_Feed
       
    18  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    19  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    20  * @version    $Id: Feed.php 20096 2010-01-06 02:05:09Z bkarwin $
       
    21  */
       
    22 
       
    23 
       
    24 /**
       
    25  * Feed utility class
       
    26  *
       
    27  * Base Zend_Feed class, containing constants and the Zend_Http_Client instance
       
    28  * accessor.
       
    29  *
       
    30  * @category   Zend
       
    31  * @package    Zend_Feed
       
    32  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    33  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    34  */
       
    35 class Zend_Feed
       
    36 {
       
    37 
       
    38     /**
       
    39      * HTTP client object to use for retrieving feeds
       
    40      *
       
    41      * @var Zend_Http_Client
       
    42      */
       
    43     protected static $_httpClient = null;
       
    44 
       
    45     /**
       
    46      * Override HTTP PUT and DELETE request methods?
       
    47      *
       
    48      * @var boolean
       
    49      */
       
    50     protected static $_httpMethodOverride = false;
       
    51 
       
    52     /**
       
    53      * @var array
       
    54      */
       
    55     protected static $_namespaces = array(
       
    56         'opensearch' => 'http://a9.com/-/spec/opensearchrss/1.0/',
       
    57         'atom'       => 'http://www.w3.org/2005/Atom',
       
    58         'rss'        => 'http://blogs.law.harvard.edu/tech/rss',
       
    59     );
       
    60 
       
    61 
       
    62     /**
       
    63      * Set the HTTP client instance
       
    64      *
       
    65      * Sets the HTTP client object to use for retrieving the feeds.
       
    66      *
       
    67      * @param  Zend_Http_Client $httpClient
       
    68      * @return void
       
    69      */
       
    70     public static function setHttpClient(Zend_Http_Client $httpClient)
       
    71     {
       
    72         self::$_httpClient = $httpClient;
       
    73     }
       
    74 
       
    75 
       
    76     /**
       
    77      * Gets the HTTP client object. If none is set, a new Zend_Http_Client will be used.
       
    78      *
       
    79      * @return Zend_Http_Client_Abstract
       
    80      */
       
    81     public static function getHttpClient()
       
    82     {
       
    83         if (!self::$_httpClient instanceof Zend_Http_Client) {
       
    84             /**
       
    85              * @see Zend_Http_Client
       
    86              */
       
    87             require_once 'Zend/Http/Client.php';
       
    88             self::$_httpClient = new Zend_Http_Client();
       
    89         }
       
    90 
       
    91         return self::$_httpClient;
       
    92     }
       
    93 
       
    94 
       
    95     /**
       
    96      * Toggle using POST instead of PUT and DELETE HTTP methods
       
    97      *
       
    98      * Some feed implementations do not accept PUT and DELETE HTTP
       
    99      * methods, or they can't be used because of proxies or other
       
   100      * measures. This allows turning on using POST where PUT and
       
   101      * DELETE would normally be used; in addition, an
       
   102      * X-Method-Override header will be sent with a value of PUT or
       
   103      * DELETE as appropriate.
       
   104      *
       
   105      * @param  boolean $override Whether to override PUT and DELETE.
       
   106      * @return void
       
   107      */
       
   108     public static function setHttpMethodOverride($override = true)
       
   109     {
       
   110         self::$_httpMethodOverride = $override;
       
   111     }
       
   112 
       
   113 
       
   114     /**
       
   115      * Get the HTTP override state
       
   116      *
       
   117      * @return boolean
       
   118      */
       
   119     public static function getHttpMethodOverride()
       
   120     {
       
   121         return self::$_httpMethodOverride;
       
   122     }
       
   123 
       
   124 
       
   125     /**
       
   126      * Get the full version of a namespace prefix
       
   127      *
       
   128      * Looks up a prefix (atom:, etc.) in the list of registered
       
   129      * namespaces and returns the full namespace URI if
       
   130      * available. Returns the prefix, unmodified, if it's not
       
   131      * registered.
       
   132      *
       
   133      * @return string
       
   134      */
       
   135     public static function lookupNamespace($prefix)
       
   136     {
       
   137         return isset(self::$_namespaces[$prefix]) ?
       
   138             self::$_namespaces[$prefix] :
       
   139             $prefix;
       
   140     }
       
   141 
       
   142 
       
   143     /**
       
   144      * Add a namespace and prefix to the registered list
       
   145      *
       
   146      * Takes a prefix and a full namespace URI and adds them to the
       
   147      * list of registered namespaces for use by
       
   148      * Zend_Feed::lookupNamespace().
       
   149      *
       
   150      * @param  string $prefix The namespace prefix
       
   151      * @param  string $namespaceURI The full namespace URI
       
   152      * @return void
       
   153      */
       
   154     public static function registerNamespace($prefix, $namespaceURI)
       
   155     {
       
   156         self::$_namespaces[$prefix] = $namespaceURI;
       
   157     }
       
   158 
       
   159 
       
   160     /**
       
   161      * Imports a feed located at $uri.
       
   162      *
       
   163      * @param  string $uri
       
   164      * @throws Zend_Feed_Exception
       
   165      * @return Zend_Feed_Abstract
       
   166      */
       
   167     public static function import($uri)
       
   168     {
       
   169         $client = self::getHttpClient();
       
   170         $client->setUri($uri);
       
   171         $response = $client->request('GET');
       
   172         if ($response->getStatus() !== 200) {
       
   173             /**
       
   174              * @see Zend_Feed_Exception
       
   175              */
       
   176             require_once 'Zend/Feed/Exception.php';
       
   177             throw new Zend_Feed_Exception('Feed failed to load, got response code ' . $response->getStatus());
       
   178         }
       
   179         $feed = $response->getBody();
       
   180         return self::importString($feed);
       
   181     }
       
   182 
       
   183 
       
   184     /**
       
   185      * Imports a feed represented by $string.
       
   186      *
       
   187      * @param  string $string
       
   188      * @throws Zend_Feed_Exception
       
   189      * @return Zend_Feed_Abstract
       
   190      */
       
   191     public static function importString($string)
       
   192     {
       
   193         // Load the feed as an XML DOMDocument object
       
   194         $libxml_errflag = libxml_use_internal_errors(true);
       
   195         $doc = new DOMDocument;
       
   196         if (trim($string) == '') {
       
   197             require_once 'Zend/Feed/Exception.php';
       
   198             throw new Zend_Feed_Exception('Document/string being imported'
       
   199             . ' is an Empty string or comes from an empty HTTP response');
       
   200         }
       
   201         $status = $doc->loadXML($string);
       
   202         libxml_use_internal_errors($libxml_errflag);
       
   203 
       
   204 
       
   205         if (!$status) {
       
   206             // prevent the class to generate an undefined variable notice (ZF-2590)
       
   207             // Build error message
       
   208             $error = libxml_get_last_error();
       
   209             if ($error && $error->message) {
       
   210                 $errormsg = "DOMDocument cannot parse XML: {$error->message}";
       
   211             } else {
       
   212                 $errormsg = "DOMDocument cannot parse XML";
       
   213             }
       
   214 
       
   215 
       
   216             /**
       
   217              * @see Zend_Feed_Exception
       
   218              */
       
   219             require_once 'Zend/Feed/Exception.php';
       
   220             throw new Zend_Feed_Exception($errormsg);
       
   221         }
       
   222 
       
   223         // Try to find the base feed element or a single <entry> of an Atom feed
       
   224         if ($doc->getElementsByTagName('feed')->item(0) ||
       
   225             $doc->getElementsByTagName('entry')->item(0)) {
       
   226             /**
       
   227              * @see Zend_Feed_Atom
       
   228              */
       
   229             require_once 'Zend/Feed/Atom.php';
       
   230             // return a newly created Zend_Feed_Atom object
       
   231             return new Zend_Feed_Atom(null, $string);
       
   232         }
       
   233 
       
   234         // Try to find the base feed element of an RSS feed
       
   235         if ($doc->getElementsByTagName('channel')->item(0)) {
       
   236             /**
       
   237              * @see Zend_Feed_Rss
       
   238              */
       
   239             require_once 'Zend/Feed/Rss.php';
       
   240             // return a newly created Zend_Feed_Rss object
       
   241             return new Zend_Feed_Rss(null, $string);
       
   242         }
       
   243 
       
   244         // $string does not appear to be a valid feed of the supported types
       
   245         /**
       
   246          * @see Zend_Feed_Exception
       
   247          */
       
   248         require_once 'Zend/Feed/Exception.php';
       
   249         throw new Zend_Feed_Exception('Invalid or unsupported feed format');
       
   250     }
       
   251 
       
   252 
       
   253     /**
       
   254      * Imports a feed from a file located at $filename.
       
   255      *
       
   256      * @param  string $filename
       
   257      * @throws Zend_Feed_Exception
       
   258      * @return Zend_Feed_Abstract
       
   259      */
       
   260     public static function importFile($filename)
       
   261     {
       
   262         @ini_set('track_errors', 1);
       
   263         $feed = @file_get_contents($filename);
       
   264         @ini_restore('track_errors');
       
   265         if ($feed === false) {
       
   266             /**
       
   267              * @see Zend_Feed_Exception
       
   268              */
       
   269             require_once 'Zend/Feed/Exception.php';
       
   270             throw new Zend_Feed_Exception("File could not be loaded: $php_errormsg");
       
   271         }
       
   272         return self::importString($feed);
       
   273     }
       
   274 
       
   275 
       
   276     /**
       
   277      * Attempts to find feeds at $uri referenced by <link ... /> tags. Returns an
       
   278      * array of the feeds referenced at $uri.
       
   279      *
       
   280      * @todo Allow findFeeds() to follow one, but only one, code 302.
       
   281      *
       
   282      * @param  string $uri
       
   283      * @throws Zend_Feed_Exception
       
   284      * @return array
       
   285      */
       
   286     public static function findFeeds($uri)
       
   287     {
       
   288         // Get the HTTP response from $uri and save the contents
       
   289         $client = self::getHttpClient();
       
   290         $client->setUri($uri);
       
   291         $response = $client->request();
       
   292         if ($response->getStatus() !== 200) {
       
   293             /**
       
   294              * @see Zend_Feed_Exception
       
   295              */
       
   296             require_once 'Zend/Feed/Exception.php';
       
   297             throw new Zend_Feed_Exception("Failed to access $uri, got response code " . $response->getStatus());
       
   298         }
       
   299         $contents = $response->getBody();
       
   300 
       
   301         // Parse the contents for appropriate <link ... /> tags
       
   302         @ini_set('track_errors', 1);
       
   303         $pattern = '~(<link[^>]+)/?>~i';
       
   304         $result = @preg_match_all($pattern, $contents, $matches);
       
   305         @ini_restore('track_errors');
       
   306         if ($result === false) {
       
   307             /**
       
   308              * @see Zend_Feed_Exception
       
   309              */
       
   310             require_once 'Zend/Feed/Exception.php';
       
   311             throw new Zend_Feed_Exception("Internal error: $php_errormsg");
       
   312         }
       
   313 
       
   314         // Try to fetch a feed for each link tag that appears to refer to a feed
       
   315         $feeds = array();
       
   316         if (isset($matches[1]) && count($matches[1]) > 0) {
       
   317             foreach ($matches[1] as $link) {
       
   318                 // force string to be an utf-8 one
       
   319                 if (!mb_check_encoding($link, 'UTF-8')) {
       
   320                     $link = mb_convert_encoding($link, 'UTF-8');
       
   321                 }
       
   322                 $xml = @simplexml_load_string(rtrim($link, ' /') . ' />');
       
   323                 if ($xml === false) {
       
   324                     continue;
       
   325                 }
       
   326                 $attributes = $xml->attributes();
       
   327                 if (!isset($attributes['rel']) || !@preg_match('~^(?:alternate|service\.feed)~i', $attributes['rel'])) {
       
   328                     continue;
       
   329                 }
       
   330                 if (!isset($attributes['type']) ||
       
   331                         !@preg_match('~^application/(?:atom|rss|rdf)\+xml~', $attributes['type'])) {
       
   332                     continue;
       
   333                 }
       
   334                 if (!isset($attributes['href'])) {
       
   335                     continue;
       
   336                 }
       
   337                 try {
       
   338                     // checks if we need to canonize the given uri
       
   339                     try {
       
   340                         $uri = Zend_Uri::factory((string) $attributes['href']);
       
   341                     } catch (Zend_Uri_Exception $e) {
       
   342                         // canonize the uri
       
   343                         $path = (string) $attributes['href'];
       
   344                         $query = $fragment = '';
       
   345                         if (substr($path, 0, 1) != '/') {
       
   346                             // add the current root path to this one
       
   347                             $path = rtrim($client->getUri()->getPath(), '/') . '/' . $path;
       
   348                         }
       
   349                         if (strpos($path, '?') !== false) {
       
   350                             list($path, $query) = explode('?', $path, 2);
       
   351                         }
       
   352                         if (strpos($query, '#') !== false) {
       
   353                             list($query, $fragment) = explode('#', $query, 2);
       
   354                         }
       
   355                         $uri = Zend_Uri::factory($client->getUri(true));
       
   356                         $uri->setPath($path);
       
   357                         $uri->setQuery($query);
       
   358                         $uri->setFragment($fragment);
       
   359                     }
       
   360 
       
   361                     $feed = self::import($uri);
       
   362                 } catch (Exception $e) {
       
   363                     continue;
       
   364                 }
       
   365                 $feeds[$uri->getUri()] = $feed;
       
   366             }
       
   367         }
       
   368 
       
   369         // Return the fetched feeds
       
   370         return $feeds;
       
   371     }
       
   372 
       
   373     /**
       
   374      * Construct a new Zend_Feed_Abstract object from a custom array
       
   375      *
       
   376      * @param  array  $data
       
   377      * @param  string $format (rss|atom) the requested output format
       
   378      * @return Zend_Feed_Abstract
       
   379      */
       
   380     public static function importArray(array $data, $format = 'atom')
       
   381     {
       
   382         $obj = 'Zend_Feed_' . ucfirst(strtolower($format));
       
   383         if (!class_exists($obj)) {
       
   384             require_once 'Zend/Loader.php';
       
   385             Zend_Loader::loadClass($obj);
       
   386         }
       
   387 
       
   388         /**
       
   389          * @see Zend_Feed_Builder
       
   390          */
       
   391         require_once 'Zend/Feed/Builder.php';
       
   392         return new $obj(null, null, new Zend_Feed_Builder($data));
       
   393     }
       
   394 
       
   395     /**
       
   396      * Construct a new Zend_Feed_Abstract object from a Zend_Feed_Builder_Interface data source
       
   397      *
       
   398      * @param  Zend_Feed_Builder_Interface $builder this object will be used to extract the data of the feed
       
   399      * @param  string                      $format (rss|atom) the requested output format
       
   400      * @return Zend_Feed_Abstract
       
   401      */
       
   402     public static function importBuilder(Zend_Feed_Builder_Interface $builder, $format = 'atom')
       
   403     {
       
   404         $obj = 'Zend_Feed_' . ucfirst(strtolower($format));
       
   405         if (!class_exists($obj)) {
       
   406             require_once 'Zend/Loader.php';
       
   407             Zend_Loader::loadClass($obj);
       
   408         }
       
   409         return new $obj(null, null, $builder);
       
   410     }
       
   411 }