|
1 <?php |
|
2 |
|
3 /* |
|
4 * This file is part of the Symfony package. |
|
5 * |
|
6 * (c) Fabien Potencier <fabien@symfony.com> |
|
7 * |
|
8 * For the full copyright and license information, please view the LICENSE |
|
9 * file that was distributed with this source code. |
|
10 */ |
|
11 |
|
12 namespace Symfony\Component\CssSelector; |
|
13 |
|
14 use Symfony\Component\CssSelector\Exception\ParseException; |
|
15 |
|
16 /** |
|
17 * CssSelector is the main entry point of the component and can convert CSS |
|
18 * selectors to XPath expressions. |
|
19 * |
|
20 * $xpath = CssSelector::toXpath('h1.foo'); |
|
21 * |
|
22 * This component is a port of the Python lxml library, |
|
23 * which is copyright Infrae and distributed under the BSD license. |
|
24 * |
|
25 * @author Fabien Potencier <fabien@symfony.com> |
|
26 * |
|
27 * @api |
|
28 */ |
|
29 class CssSelector |
|
30 { |
|
31 /** |
|
32 * Translates a CSS expression to its XPath equivalent. |
|
33 * Optionally, a prefix can be added to the resulting XPath |
|
34 * expression with the $prefix parameter. |
|
35 * |
|
36 * @param mixed $cssExpr The CSS expression. |
|
37 * @param string $prefix An optional prefix for the XPath expression. |
|
38 * |
|
39 * @return string |
|
40 * |
|
41 * @throws ParseException When got None for xpath expression |
|
42 * |
|
43 * @api |
|
44 */ |
|
45 static public function toXPath($cssExpr, $prefix = 'descendant-or-self::') |
|
46 { |
|
47 if (is_string($cssExpr)) { |
|
48 if (preg_match('#^\w+\s*$#u', $cssExpr, $match)) { |
|
49 return $prefix.trim($match[0]); |
|
50 } |
|
51 |
|
52 if (preg_match('~^(\w*)#(\w+)\s*$~u', $cssExpr, $match)) { |
|
53 return sprintf("%s%s[@id = '%s']", $prefix, $match[1] ? $match[1] : '*', $match[2]); |
|
54 } |
|
55 |
|
56 if (preg_match('#^(\w*)\.(\w+)\s*$#u', $cssExpr, $match)) { |
|
57 return sprintf("%s%s[contains(concat(' ', normalize-space(@class), ' '), ' %s ')]", $prefix, $match[1] ? $match[1] : '*', $match[2]); |
|
58 } |
|
59 |
|
60 $parser = new self(); |
|
61 $cssExpr = $parser->parse($cssExpr); |
|
62 } |
|
63 |
|
64 $expr = $cssExpr->toXpath(); |
|
65 |
|
66 // @codeCoverageIgnoreStart |
|
67 if (!$expr) { |
|
68 throw new ParseException(sprintf('Got None for xpath expression from %s.', $cssExpr)); |
|
69 } |
|
70 // @codeCoverageIgnoreEnd |
|
71 |
|
72 if ($prefix) { |
|
73 $expr->addPrefix($prefix); |
|
74 } |
|
75 |
|
76 return (string) $expr; |
|
77 } |
|
78 |
|
79 /** |
|
80 * Parses an expression and returns the Node object that represents |
|
81 * the parsed expression. |
|
82 * |
|
83 * @throws \Exception When tokenizer throws it while parsing |
|
84 * |
|
85 * @param string $string The expression to parse |
|
86 * |
|
87 * @return Node\NodeInterface |
|
88 */ |
|
89 public function parse($string) |
|
90 { |
|
91 $tokenizer = new Tokenizer(); |
|
92 |
|
93 $stream = new TokenStream($tokenizer->tokenize($string), $string); |
|
94 |
|
95 try { |
|
96 return $this->parseSelectorGroup($stream); |
|
97 } catch (\Exception $e) { |
|
98 $class = get_class($e); |
|
99 |
|
100 throw new $class(sprintf('%s at %s -> %s', $e->getMessage(), implode($stream->getUsed(), ''), $stream->peek()), 0, $e); |
|
101 } |
|
102 } |
|
103 |
|
104 /** |
|
105 * Parses a selector group contained in $stream and returns |
|
106 * the Node object that represents the expression. |
|
107 * |
|
108 * @param TokenStream $stream The stream to parse. |
|
109 * |
|
110 * @return Node\NodeInterface |
|
111 */ |
|
112 private function parseSelectorGroup($stream) |
|
113 { |
|
114 $result = array(); |
|
115 while (true) { |
|
116 $result[] = $this->parseSelector($stream); |
|
117 if ($stream->peek() == ',') { |
|
118 $stream->next(); |
|
119 } else { |
|
120 break; |
|
121 } |
|
122 } |
|
123 |
|
124 if (count($result) == 1) { |
|
125 return $result[0]; |
|
126 } |
|
127 |
|
128 return new Node\OrNode($result); |
|
129 } |
|
130 |
|
131 /** |
|
132 * Parses a selector contained in $stream and returns the Node |
|
133 * object that represents it. |
|
134 * |
|
135 * @throws ParseException When expected selector but got something else |
|
136 * |
|
137 * @param TokenStream $stream The stream containing the selector. |
|
138 * |
|
139 * @return Node\NodeInterface |
|
140 */ |
|
141 private function parseSelector($stream) |
|
142 { |
|
143 $result = $this->parseSimpleSelector($stream); |
|
144 |
|
145 while (true) { |
|
146 $peek = $stream->peek(); |
|
147 if (',' == $peek || null === $peek) { |
|
148 return $result; |
|
149 } elseif (in_array($peek, array('+', '>', '~'))) { |
|
150 // A combinator |
|
151 $combinator = (string) $stream->next(); |
|
152 } else { |
|
153 $combinator = ' '; |
|
154 } |
|
155 $consumed = count($stream->getUsed()); |
|
156 $nextSelector = $this->parseSimpleSelector($stream); |
|
157 if ($consumed == count($stream->getUsed())) { |
|
158 throw new ParseException(sprintf("Expected selector, got '%s'", $stream->peek())); |
|
159 } |
|
160 |
|
161 $result = new Node\CombinedSelectorNode($result, $combinator, $nextSelector); |
|
162 } |
|
163 |
|
164 return $result; |
|
165 } |
|
166 |
|
167 /** |
|
168 * Parses a simple selector (the current token) from $stream and returns |
|
169 * the resulting Node object. |
|
170 * |
|
171 * @throws ParseException When expected symbol but got something else |
|
172 * |
|
173 * @param TokenStream $stream The stream containing the selector. |
|
174 * |
|
175 * @return Node\NodeInterface |
|
176 */ |
|
177 private function parseSimpleSelector($stream) |
|
178 { |
|
179 $peek = $stream->peek(); |
|
180 if ('*' != $peek && !$peek->isType('Symbol')) { |
|
181 $element = $namespace = '*'; |
|
182 } else { |
|
183 $next = $stream->next(); |
|
184 if ('*' != $next && !$next->isType('Symbol')) { |
|
185 throw new ParseException(sprintf("Expected symbol, got '%s'", $next)); |
|
186 } |
|
187 |
|
188 if ($stream->peek() == '|') { |
|
189 $namespace = $next; |
|
190 $stream->next(); |
|
191 $element = $stream->next(); |
|
192 if ('*' != $element && !$next->isType('Symbol')) { |
|
193 throw new ParseException(sprintf("Expected symbol, got '%s'", $next)); |
|
194 } |
|
195 } else { |
|
196 $namespace = '*'; |
|
197 $element = $next; |
|
198 } |
|
199 } |
|
200 |
|
201 $result = new Node\ElementNode($namespace, $element); |
|
202 $hasHash = false; |
|
203 while (true) { |
|
204 $peek = $stream->peek(); |
|
205 if ('#' == $peek) { |
|
206 if ($hasHash) { |
|
207 /* You can't have two hashes |
|
208 (FIXME: is there some more general rule I'm missing?) */ |
|
209 // @codeCoverageIgnoreStart |
|
210 break; |
|
211 // @codeCoverageIgnoreEnd |
|
212 } |
|
213 $stream->next(); |
|
214 $result = new Node\HashNode($result, $stream->next()); |
|
215 $hasHash = true; |
|
216 |
|
217 continue; |
|
218 } elseif ('.' == $peek) { |
|
219 $stream->next(); |
|
220 $result = new Node\ClassNode($result, $stream->next()); |
|
221 |
|
222 continue; |
|
223 } elseif ('[' == $peek) { |
|
224 $stream->next(); |
|
225 $result = $this->parseAttrib($result, $stream); |
|
226 $next = $stream->next(); |
|
227 if (']' != $next) { |
|
228 throw new ParseException(sprintf("] expected, got '%s'", $next)); |
|
229 } |
|
230 |
|
231 continue; |
|
232 } elseif (':' == $peek || '::' == $peek) { |
|
233 $type = $stream->next(); |
|
234 $ident = $stream->next(); |
|
235 if (!$ident || !$ident->isType('Symbol')) { |
|
236 throw new ParseException(sprintf("Expected symbol, got '%s'", $ident)); |
|
237 } |
|
238 |
|
239 if ($stream->peek() == '(') { |
|
240 $stream->next(); |
|
241 $peek = $stream->peek(); |
|
242 if ($peek->isType('String')) { |
|
243 $selector = $stream->next(); |
|
244 } elseif ($peek->isType('Symbol') && is_int($peek)) { |
|
245 $selector = intval($stream->next()); |
|
246 } else { |
|
247 // FIXME: parseSimpleSelector, or selector, or...? |
|
248 $selector = $this->parseSimpleSelector($stream); |
|
249 } |
|
250 $next = $stream->next(); |
|
251 if (')' != $next) { |
|
252 throw new ParseException(sprintf("Expected ')', got '%s' and '%s'", $next, $selector)); |
|
253 } |
|
254 |
|
255 $result = new Node\FunctionNode($result, $type, $ident, $selector); |
|
256 } else { |
|
257 $result = new Node\PseudoNode($result, $type, $ident); |
|
258 } |
|
259 |
|
260 continue; |
|
261 } else { |
|
262 if (' ' == $peek) { |
|
263 $stream->next(); |
|
264 } |
|
265 |
|
266 break; |
|
267 } |
|
268 // FIXME: not sure what "negation" is |
|
269 } |
|
270 |
|
271 return $result; |
|
272 } |
|
273 |
|
274 /** |
|
275 * Parses an attribute from a selector contained in $stream and returns |
|
276 * the resulting AttribNode object. |
|
277 * |
|
278 * @throws ParseException When encountered unexpected selector |
|
279 * |
|
280 * @param Node\NodeInterface $selector The selector object whose attribute |
|
281 * is to be parsed. |
|
282 * @param TokenStream $stream The container token stream. |
|
283 * |
|
284 * @return Node\AttribNode |
|
285 */ |
|
286 private function parseAttrib($selector, $stream) |
|
287 { |
|
288 $attrib = $stream->next(); |
|
289 if ($stream->peek() == '|') { |
|
290 $namespace = $attrib; |
|
291 $stream->next(); |
|
292 $attrib = $stream->next(); |
|
293 } else { |
|
294 $namespace = '*'; |
|
295 } |
|
296 |
|
297 if ($stream->peek() == ']') { |
|
298 return new Node\AttribNode($selector, $namespace, $attrib, 'exists', null); |
|
299 } |
|
300 |
|
301 $op = $stream->next(); |
|
302 if (!in_array($op, array('^=', '$=', '*=', '=', '~=', '|=', '!='))) { |
|
303 throw new ParseException(sprintf("Operator expected, got '%s'", $op)); |
|
304 } |
|
305 |
|
306 $value = $stream->next(); |
|
307 if (!$value->isType('Symbol') && !$value->isType('String')) { |
|
308 throw new ParseException(sprintf("Expected string or symbol, got '%s'", $value)); |
|
309 } |
|
310 |
|
311 return new Node\AttribNode($selector, $namespace, $attrib, $op, $value); |
|
312 } |
|
313 } |