web/lib/Zend/Dom/Query.php
changeset 64 162c1de6545a
parent 19 1c2f13fd785c
child 68 ecaf28ffe26e
equal deleted inserted replaced
63:5b37998e522e 64:162c1de6545a
       
     1 <?php
       
     2 /**
       
     3  * Zend Framework
       
     4  *
       
     5  * LICENSE
       
     6  *
       
     7  * This source file is subject to the new BSD license that is bundled
       
     8  * with this package in the file LICENSE.txt.
       
     9  * It is also available through the world-wide-web at this URL:
       
    10  * http://framework.zend.com/license/new-bsd
       
    11  * If you did not receive a copy of the license and are unable to
       
    12  * obtain it through the world-wide-web, please send an email
       
    13  * to license@zend.com so we can send you a copy immediately.
       
    14  *
       
    15  * @category   Zend
       
    16  * @package    Zend_Dom
       
    17  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    18  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    19  * @version    $Id: Query.php 23062 2010-10-08 14:05:45Z matthew $
       
    20  */
       
    21 
       
    22 /**
       
    23  * @see Zend_Dom_Query_Css2Xpath
       
    24  */
       
    25 require_once 'Zend/Dom/Query/Css2Xpath.php';
       
    26 
       
    27 /**
       
    28  * @see Zend_Dom_Query_Result
       
    29  */
       
    30 require_once 'Zend/Dom/Query/Result.php';
       
    31 
       
    32 /**
       
    33  * Query DOM structures based on CSS selectors and/or XPath
       
    34  *
       
    35  * @package    Zend_Dom
       
    36  * @subpackage Query
       
    37  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    38  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    39  */
       
    40 class Zend_Dom_Query
       
    41 {
       
    42     /**#@+
       
    43      * Document types
       
    44      */
       
    45     const DOC_XML   = 'docXml';
       
    46     const DOC_HTML  = 'docHtml';
       
    47     const DOC_XHTML = 'docXhtml';
       
    48     /**#@-*/
       
    49 
       
    50     /**
       
    51      * @var string
       
    52      */
       
    53     protected $_document;
       
    54 
       
    55     /**
       
    56      * DOMDocument errors, if any
       
    57      * @var false|array
       
    58      */
       
    59     protected $_documentErrors = false;
       
    60 
       
    61     /**
       
    62      * Document type
       
    63      * @var string
       
    64      */
       
    65     protected $_docType;
       
    66 
       
    67     /**
       
    68      * Document encoding
       
    69      * @var null|string
       
    70      */
       
    71     protected $_encoding;
       
    72 
       
    73     /**
       
    74      * XPath namespaces
       
    75      * @var array
       
    76      */
       
    77     protected $_xpathNamespaces = array();
       
    78 
       
    79     /**
       
    80      * Constructor
       
    81      *
       
    82      * @param  null|string $document
       
    83      * @return void
       
    84      */
       
    85     public function __construct($document = null, $encoding = null)
       
    86     {
       
    87         $this->setEncoding($encoding);
       
    88         $this->setDocument($document);
       
    89     }
       
    90 
       
    91     /**
       
    92      * Set document encoding
       
    93      * 
       
    94      * @param  string $encoding 
       
    95      * @return Zend_Dom_Query
       
    96      */
       
    97     public function setEncoding($encoding)
       
    98     {
       
    99         $this->_encoding = (null === $encoding) ? null : (string) $encoding;
       
   100         return $this;
       
   101     }
       
   102 
       
   103     /**
       
   104      * Get document encoding
       
   105      * 
       
   106      * @return null|string
       
   107      */
       
   108     public function getEncoding()
       
   109     {
       
   110         return $this->_encoding;
       
   111     }
       
   112 
       
   113     /**
       
   114      * Set document to query
       
   115      *
       
   116      * @param  string $document
       
   117      * @param  null|string $encoding Document encoding
       
   118      * @return Zend_Dom_Query
       
   119      */
       
   120     public function setDocument($document, $encoding = null)
       
   121     {
       
   122         if (0 === strlen($document)) {
       
   123             return $this;
       
   124         }
       
   125         // breaking XML declaration to make syntax highlighting work
       
   126         if ('<' . '?xml' == substr(trim($document), 0, 5)) {
       
   127             return $this->setDocumentXml($document, $encoding);
       
   128         }
       
   129         if (strstr($document, 'DTD XHTML')) {
       
   130             return $this->setDocumentXhtml($document, $encoding);
       
   131         }
       
   132         return $this->setDocumentHtml($document, $encoding);
       
   133     }
       
   134 
       
   135     /**
       
   136      * Register HTML document
       
   137      *
       
   138      * @param  string $document
       
   139      * @param  null|string $encoding Document encoding
       
   140      * @return Zend_Dom_Query
       
   141      */
       
   142     public function setDocumentHtml($document, $encoding = null)
       
   143     {
       
   144         $this->_document = (string) $document;
       
   145         $this->_docType  = self::DOC_HTML;
       
   146         if (null !== $encoding) {
       
   147             $this->setEncoding($encoding);
       
   148         }
       
   149         return $this;
       
   150     }
       
   151 
       
   152     /**
       
   153      * Register XHTML document
       
   154      *
       
   155      * @param  string $document
       
   156      * @param  null|string $encoding Document encoding
       
   157      * @return Zend_Dom_Query
       
   158      */
       
   159     public function setDocumentXhtml($document, $encoding = null)
       
   160     {
       
   161         $this->_document = (string) $document;
       
   162         $this->_docType  = self::DOC_XHTML;
       
   163         if (null !== $encoding) {
       
   164             $this->setEncoding($encoding);
       
   165         }
       
   166         return $this;
       
   167     }
       
   168 
       
   169     /**
       
   170      * Register XML document
       
   171      *
       
   172      * @param  string $document
       
   173      * @param  null|string $encoding Document encoding
       
   174      * @return Zend_Dom_Query
       
   175      */
       
   176     public function setDocumentXml($document, $encoding = null)
       
   177     {
       
   178         $this->_document = (string) $document;
       
   179         $this->_docType  = self::DOC_XML;
       
   180         if (null !== $encoding) {
       
   181             $this->setEncoding($encoding);
       
   182         }
       
   183         return $this;
       
   184     }
       
   185 
       
   186     /**
       
   187      * Retrieve current document
       
   188      *
       
   189      * @return string
       
   190      */
       
   191     public function getDocument()
       
   192     {
       
   193         return $this->_document;
       
   194     }
       
   195 
       
   196     /**
       
   197      * Get document type
       
   198      *
       
   199      * @return string
       
   200      */
       
   201     public function getDocumentType()
       
   202     {
       
   203         return $this->_docType;
       
   204     }
       
   205 
       
   206     /**
       
   207      * Get any DOMDocument errors found
       
   208      * 
       
   209      * @return false|array
       
   210      */
       
   211     public function getDocumentErrors()
       
   212     {
       
   213         return $this->_documentErrors;
       
   214     }
       
   215 
       
   216     /**
       
   217      * Perform a CSS selector query
       
   218      *
       
   219      * @param  string $query
       
   220      * @return Zend_Dom_Query_Result
       
   221      */
       
   222     public function query($query)
       
   223     {
       
   224         $xpathQuery = Zend_Dom_Query_Css2Xpath::transform($query);
       
   225         return $this->queryXpath($xpathQuery, $query);
       
   226     }
       
   227 
       
   228     /**
       
   229      * Perform an XPath query
       
   230      *
       
   231      * @param  string|array $xpathQuery
       
   232      * @param  string $query CSS selector query
       
   233      * @return Zend_Dom_Query_Result
       
   234      */
       
   235     public function queryXpath($xpathQuery, $query = null)
       
   236     {
       
   237         if (null === ($document = $this->getDocument())) {
       
   238             require_once 'Zend/Dom/Exception.php';
       
   239             throw new Zend_Dom_Exception('Cannot query; no document registered');
       
   240         }
       
   241 
       
   242         $encoding = $this->getEncoding();
       
   243         libxml_use_internal_errors(true);
       
   244         if (null === $encoding) {
       
   245             $domDoc = new DOMDocument('1.0');
       
   246         } else {
       
   247             $domDoc = new DOMDocument('1.0', $encoding);
       
   248         }
       
   249         $type   = $this->getDocumentType();
       
   250         switch ($type) {
       
   251             case self::DOC_XML:
       
   252                 $success = $domDoc->loadXML($document);
       
   253                 break;
       
   254             case self::DOC_HTML:
       
   255             case self::DOC_XHTML:
       
   256             default:
       
   257                 $success = $domDoc->loadHTML($document);
       
   258                 break;
       
   259         }
       
   260         $errors = libxml_get_errors();
       
   261         if (!empty($errors)) {
       
   262             $this->_documentErrors = $errors;
       
   263             libxml_clear_errors();
       
   264         }
       
   265         libxml_use_internal_errors(false);
       
   266 
       
   267         if (!$success) {
       
   268             require_once 'Zend/Dom/Exception.php';
       
   269             throw new Zend_Dom_Exception(sprintf('Error parsing document (type == %s)', $type));
       
   270         }
       
   271 
       
   272         $nodeList   = $this->_getNodeList($domDoc, $xpathQuery);
       
   273         return new Zend_Dom_Query_Result($query, $xpathQuery, $domDoc, $nodeList);
       
   274     }
       
   275 
       
   276     /**
       
   277      * Register XPath namespaces
       
   278      *
       
   279      * @param   array $xpathNamespaces
       
   280      * @return  void
       
   281      */
       
   282     public function registerXpathNamespaces($xpathNamespaces)
       
   283     {
       
   284         $this->_xpathNamespaces = $xpathNamespaces;
       
   285     }
       
   286 
       
   287     /**
       
   288      * Prepare node list
       
   289      *
       
   290      * @param  DOMDocument $document
       
   291      * @param  string|array $xpathQuery
       
   292      * @return array
       
   293      */
       
   294     protected function _getNodeList($document, $xpathQuery)
       
   295     {
       
   296         $xpath      = new DOMXPath($document);
       
   297         foreach ($this->_xpathNamespaces as $prefix => $namespaceUri) {
       
   298             $xpath->registerNamespace($prefix, $namespaceUri);
       
   299         }
       
   300         $xpathQuery = (string) $xpathQuery;
       
   301         if (preg_match_all('|\[contains\((@[a-z0-9_-]+),\s?\' |i', $xpathQuery, $matches)) {
       
   302             foreach ($matches[1] as $attribute) {
       
   303                 $queryString = '//*[' . $attribute . ']';
       
   304                 $attributeName = substr($attribute, 1);
       
   305                 $nodes = $xpath->query($queryString);
       
   306                 foreach ($nodes as $node) {
       
   307                     $attr = $node->attributes->getNamedItem($attributeName);
       
   308                     $attr->value = ' ' . $attr->value . ' ';
       
   309                 }
       
   310             }
       
   311         }
       
   312         return $xpath->query($xpathQuery);
       
   313     }
       
   314 }