|
1 <?php |
|
2 /** |
|
3 * Zend Framework |
|
4 * |
|
5 * LICENSE |
|
6 * |
|
7 * This source file is subject to the new BSD license that is bundled |
|
8 * with this package in the file LICENSE.txt. |
|
9 * It is also available through the world-wide-web at this URL: |
|
10 * http://framework.zend.com/license/new-bsd |
|
11 * If you did not receive a copy of the license and are unable to |
|
12 * obtain it through the world-wide-web, please send an email |
|
13 * to license@zend.com so we can send you a copy immediately. |
|
14 * |
|
15 * @category Zend |
|
16 * @package Zend_Dom |
|
17 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
18 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
19 * @version $Id: Query.php 23062 2010-10-08 14:05:45Z matthew $ |
|
20 */ |
|
21 |
|
22 /** |
|
23 * @see Zend_Dom_Query_Css2Xpath |
|
24 */ |
|
25 require_once 'Zend/Dom/Query/Css2Xpath.php'; |
|
26 |
|
27 /** |
|
28 * @see Zend_Dom_Query_Result |
|
29 */ |
|
30 require_once 'Zend/Dom/Query/Result.php'; |
|
31 |
|
32 /** |
|
33 * Query DOM structures based on CSS selectors and/or XPath |
|
34 * |
|
35 * @package Zend_Dom |
|
36 * @subpackage Query |
|
37 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
38 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
39 */ |
|
40 class Zend_Dom_Query |
|
41 { |
|
42 /**#@+ |
|
43 * Document types |
|
44 */ |
|
45 const DOC_XML = 'docXml'; |
|
46 const DOC_HTML = 'docHtml'; |
|
47 const DOC_XHTML = 'docXhtml'; |
|
48 /**#@-*/ |
|
49 |
|
50 /** |
|
51 * @var string |
|
52 */ |
|
53 protected $_document; |
|
54 |
|
55 /** |
|
56 * DOMDocument errors, if any |
|
57 * @var false|array |
|
58 */ |
|
59 protected $_documentErrors = false; |
|
60 |
|
61 /** |
|
62 * Document type |
|
63 * @var string |
|
64 */ |
|
65 protected $_docType; |
|
66 |
|
67 /** |
|
68 * Document encoding |
|
69 * @var null|string |
|
70 */ |
|
71 protected $_encoding; |
|
72 |
|
73 /** |
|
74 * XPath namespaces |
|
75 * @var array |
|
76 */ |
|
77 protected $_xpathNamespaces = array(); |
|
78 |
|
79 /** |
|
80 * Constructor |
|
81 * |
|
82 * @param null|string $document |
|
83 * @return void |
|
84 */ |
|
85 public function __construct($document = null, $encoding = null) |
|
86 { |
|
87 $this->setEncoding($encoding); |
|
88 $this->setDocument($document); |
|
89 } |
|
90 |
|
91 /** |
|
92 * Set document encoding |
|
93 * |
|
94 * @param string $encoding |
|
95 * @return Zend_Dom_Query |
|
96 */ |
|
97 public function setEncoding($encoding) |
|
98 { |
|
99 $this->_encoding = (null === $encoding) ? null : (string) $encoding; |
|
100 return $this; |
|
101 } |
|
102 |
|
103 /** |
|
104 * Get document encoding |
|
105 * |
|
106 * @return null|string |
|
107 */ |
|
108 public function getEncoding() |
|
109 { |
|
110 return $this->_encoding; |
|
111 } |
|
112 |
|
113 /** |
|
114 * Set document to query |
|
115 * |
|
116 * @param string $document |
|
117 * @param null|string $encoding Document encoding |
|
118 * @return Zend_Dom_Query |
|
119 */ |
|
120 public function setDocument($document, $encoding = null) |
|
121 { |
|
122 if (0 === strlen($document)) { |
|
123 return $this; |
|
124 } |
|
125 // breaking XML declaration to make syntax highlighting work |
|
126 if ('<' . '?xml' == substr(trim($document), 0, 5)) { |
|
127 return $this->setDocumentXml($document, $encoding); |
|
128 } |
|
129 if (strstr($document, 'DTD XHTML')) { |
|
130 return $this->setDocumentXhtml($document, $encoding); |
|
131 } |
|
132 return $this->setDocumentHtml($document, $encoding); |
|
133 } |
|
134 |
|
135 /** |
|
136 * Register HTML document |
|
137 * |
|
138 * @param string $document |
|
139 * @param null|string $encoding Document encoding |
|
140 * @return Zend_Dom_Query |
|
141 */ |
|
142 public function setDocumentHtml($document, $encoding = null) |
|
143 { |
|
144 $this->_document = (string) $document; |
|
145 $this->_docType = self::DOC_HTML; |
|
146 if (null !== $encoding) { |
|
147 $this->setEncoding($encoding); |
|
148 } |
|
149 return $this; |
|
150 } |
|
151 |
|
152 /** |
|
153 * Register XHTML document |
|
154 * |
|
155 * @param string $document |
|
156 * @param null|string $encoding Document encoding |
|
157 * @return Zend_Dom_Query |
|
158 */ |
|
159 public function setDocumentXhtml($document, $encoding = null) |
|
160 { |
|
161 $this->_document = (string) $document; |
|
162 $this->_docType = self::DOC_XHTML; |
|
163 if (null !== $encoding) { |
|
164 $this->setEncoding($encoding); |
|
165 } |
|
166 return $this; |
|
167 } |
|
168 |
|
169 /** |
|
170 * Register XML document |
|
171 * |
|
172 * @param string $document |
|
173 * @param null|string $encoding Document encoding |
|
174 * @return Zend_Dom_Query |
|
175 */ |
|
176 public function setDocumentXml($document, $encoding = null) |
|
177 { |
|
178 $this->_document = (string) $document; |
|
179 $this->_docType = self::DOC_XML; |
|
180 if (null !== $encoding) { |
|
181 $this->setEncoding($encoding); |
|
182 } |
|
183 return $this; |
|
184 } |
|
185 |
|
186 /** |
|
187 * Retrieve current document |
|
188 * |
|
189 * @return string |
|
190 */ |
|
191 public function getDocument() |
|
192 { |
|
193 return $this->_document; |
|
194 } |
|
195 |
|
196 /** |
|
197 * Get document type |
|
198 * |
|
199 * @return string |
|
200 */ |
|
201 public function getDocumentType() |
|
202 { |
|
203 return $this->_docType; |
|
204 } |
|
205 |
|
206 /** |
|
207 * Get any DOMDocument errors found |
|
208 * |
|
209 * @return false|array |
|
210 */ |
|
211 public function getDocumentErrors() |
|
212 { |
|
213 return $this->_documentErrors; |
|
214 } |
|
215 |
|
216 /** |
|
217 * Perform a CSS selector query |
|
218 * |
|
219 * @param string $query |
|
220 * @return Zend_Dom_Query_Result |
|
221 */ |
|
222 public function query($query) |
|
223 { |
|
224 $xpathQuery = Zend_Dom_Query_Css2Xpath::transform($query); |
|
225 return $this->queryXpath($xpathQuery, $query); |
|
226 } |
|
227 |
|
228 /** |
|
229 * Perform an XPath query |
|
230 * |
|
231 * @param string|array $xpathQuery |
|
232 * @param string $query CSS selector query |
|
233 * @return Zend_Dom_Query_Result |
|
234 */ |
|
235 public function queryXpath($xpathQuery, $query = null) |
|
236 { |
|
237 if (null === ($document = $this->getDocument())) { |
|
238 require_once 'Zend/Dom/Exception.php'; |
|
239 throw new Zend_Dom_Exception('Cannot query; no document registered'); |
|
240 } |
|
241 |
|
242 $encoding = $this->getEncoding(); |
|
243 libxml_use_internal_errors(true); |
|
244 if (null === $encoding) { |
|
245 $domDoc = new DOMDocument('1.0'); |
|
246 } else { |
|
247 $domDoc = new DOMDocument('1.0', $encoding); |
|
248 } |
|
249 $type = $this->getDocumentType(); |
|
250 switch ($type) { |
|
251 case self::DOC_XML: |
|
252 $success = $domDoc->loadXML($document); |
|
253 break; |
|
254 case self::DOC_HTML: |
|
255 case self::DOC_XHTML: |
|
256 default: |
|
257 $success = $domDoc->loadHTML($document); |
|
258 break; |
|
259 } |
|
260 $errors = libxml_get_errors(); |
|
261 if (!empty($errors)) { |
|
262 $this->_documentErrors = $errors; |
|
263 libxml_clear_errors(); |
|
264 } |
|
265 libxml_use_internal_errors(false); |
|
266 |
|
267 if (!$success) { |
|
268 require_once 'Zend/Dom/Exception.php'; |
|
269 throw new Zend_Dom_Exception(sprintf('Error parsing document (type == %s)', $type)); |
|
270 } |
|
271 |
|
272 $nodeList = $this->_getNodeList($domDoc, $xpathQuery); |
|
273 return new Zend_Dom_Query_Result($query, $xpathQuery, $domDoc, $nodeList); |
|
274 } |
|
275 |
|
276 /** |
|
277 * Register XPath namespaces |
|
278 * |
|
279 * @param array $xpathNamespaces |
|
280 * @return void |
|
281 */ |
|
282 public function registerXpathNamespaces($xpathNamespaces) |
|
283 { |
|
284 $this->_xpathNamespaces = $xpathNamespaces; |
|
285 } |
|
286 |
|
287 /** |
|
288 * Prepare node list |
|
289 * |
|
290 * @param DOMDocument $document |
|
291 * @param string|array $xpathQuery |
|
292 * @return array |
|
293 */ |
|
294 protected function _getNodeList($document, $xpathQuery) |
|
295 { |
|
296 $xpath = new DOMXPath($document); |
|
297 foreach ($this->_xpathNamespaces as $prefix => $namespaceUri) { |
|
298 $xpath->registerNamespace($prefix, $namespaceUri); |
|
299 } |
|
300 $xpathQuery = (string) $xpathQuery; |
|
301 if (preg_match_all('|\[contains\((@[a-z0-9_-]+),\s?\' |i', $xpathQuery, $matches)) { |
|
302 foreach ($matches[1] as $attribute) { |
|
303 $queryString = '//*[' . $attribute . ']'; |
|
304 $attributeName = substr($attribute, 1); |
|
305 $nodes = $xpath->query($queryString); |
|
306 foreach ($nodes as $node) { |
|
307 $attr = $node->attributes->getNamedItem($attributeName); |
|
308 $attr->value = ' ' . $attr->value . ' '; |
|
309 } |
|
310 } |
|
311 } |
|
312 return $xpath->query($xpathQuery); |
|
313 } |
|
314 } |