vendor/symfony/src/Symfony/Component/CssSelector/CssSelector.php
changeset 0 7f95f8617b0b
equal deleted inserted replaced
-1:000000000000 0:7f95f8617b0b
       
     1 <?php
       
     2 
       
     3 /*
       
     4  * This file is part of the Symfony package.
       
     5  *
       
     6  * (c) Fabien Potencier <fabien@symfony.com>
       
     7  *
       
     8  * For the full copyright and license information, please view the LICENSE
       
     9  * file that was distributed with this source code.
       
    10  */
       
    11 
       
    12 namespace Symfony\Component\CssSelector;
       
    13 
       
    14 use Symfony\Component\CssSelector\Exception\ParseException;
       
    15 
       
    16 /**
       
    17  * CssSelector is the main entry point of the component and can convert CSS
       
    18  * selectors to XPath expressions.
       
    19  *
       
    20  * $xpath = CssSelector::toXpath('h1.foo');
       
    21  *
       
    22  * This component is a port of the Python lxml library,
       
    23  * which is copyright Infrae and distributed under the BSD license.
       
    24  *
       
    25  * @author Fabien Potencier <fabien@symfony.com>
       
    26  *
       
    27  * @api
       
    28  */
       
    29 class CssSelector
       
    30 {
       
    31     /**
       
    32      * Translates a CSS expression to its XPath equivalent.
       
    33      * Optionally, a prefix can be added to the resulting XPath
       
    34      * expression with the $prefix parameter.
       
    35      *
       
    36      * @param  mixed  $cssExpr The CSS expression.
       
    37      * @param  string $prefix  An optional prefix for the XPath expression.
       
    38      *
       
    39      * @return string
       
    40      *
       
    41      * @throws ParseException When got None for xpath expression
       
    42      *
       
    43      * @api
       
    44      */
       
    45     static public function toXPath($cssExpr, $prefix = 'descendant-or-self::')
       
    46     {
       
    47         if (is_string($cssExpr)) {
       
    48             if (preg_match('#^\w+\s*$#u', $cssExpr, $match)) {
       
    49                 return $prefix.trim($match[0]);
       
    50             }
       
    51 
       
    52             if (preg_match('~^(\w*)#(\w+)\s*$~u', $cssExpr, $match)) {
       
    53                 return sprintf("%s%s[@id = '%s']", $prefix, $match[1] ? $match[1] : '*', $match[2]);
       
    54             }
       
    55 
       
    56             if (preg_match('#^(\w*)\.(\w+)\s*$#u', $cssExpr, $match)) {
       
    57                 return sprintf("%s%s[contains(concat(' ', normalize-space(@class), ' '), ' %s ')]", $prefix, $match[1] ? $match[1] : '*', $match[2]);
       
    58             }
       
    59 
       
    60             $parser = new self();
       
    61             $cssExpr = $parser->parse($cssExpr);
       
    62         }
       
    63 
       
    64         $expr = $cssExpr->toXpath();
       
    65 
       
    66         // @codeCoverageIgnoreStart
       
    67         if (!$expr) {
       
    68             throw new ParseException(sprintf('Got None for xpath expression from %s.', $cssExpr));
       
    69         }
       
    70         // @codeCoverageIgnoreEnd
       
    71 
       
    72         if ($prefix) {
       
    73             $expr->addPrefix($prefix);
       
    74         }
       
    75 
       
    76         return (string) $expr;
       
    77     }
       
    78 
       
    79     /**
       
    80      * Parses an expression and returns the Node object that represents
       
    81      * the parsed expression.
       
    82      *
       
    83      * @throws \Exception When tokenizer throws it while parsing
       
    84      *
       
    85      * @param  string $string The expression to parse
       
    86      *
       
    87      * @return Node\NodeInterface
       
    88      */
       
    89     public function parse($string)
       
    90     {
       
    91         $tokenizer = new Tokenizer();
       
    92 
       
    93         $stream = new TokenStream($tokenizer->tokenize($string), $string);
       
    94 
       
    95         try {
       
    96             return $this->parseSelectorGroup($stream);
       
    97         } catch (\Exception $e) {
       
    98             $class = get_class($e);
       
    99 
       
   100             throw new $class(sprintf('%s at %s -> %s', $e->getMessage(), implode($stream->getUsed(), ''), $stream->peek()), 0, $e);
       
   101         }
       
   102     }
       
   103 
       
   104     /**
       
   105      * Parses a selector group contained in $stream and returns
       
   106      * the Node object that represents the expression.
       
   107      *
       
   108      * @param  TokenStream $stream The stream to parse.
       
   109      *
       
   110      * @return Node\NodeInterface
       
   111      */
       
   112     private function parseSelectorGroup($stream)
       
   113     {
       
   114         $result = array();
       
   115         while (true) {
       
   116             $result[] = $this->parseSelector($stream);
       
   117             if ($stream->peek() == ',') {
       
   118                 $stream->next();
       
   119             } else {
       
   120                 break;
       
   121             }
       
   122         }
       
   123 
       
   124         if (count($result) == 1) {
       
   125             return $result[0];
       
   126         }
       
   127 
       
   128         return new Node\OrNode($result);
       
   129     }
       
   130 
       
   131     /**
       
   132      * Parses a selector contained in $stream and returns the Node
       
   133      * object that represents it.
       
   134      *
       
   135      * @throws ParseException When expected selector but got something else
       
   136      *
       
   137      * @param  TokenStream $stream The stream containing the selector.
       
   138      *
       
   139      * @return Node\NodeInterface
       
   140      */
       
   141     private function parseSelector($stream)
       
   142     {
       
   143         $result = $this->parseSimpleSelector($stream);
       
   144 
       
   145         while (true) {
       
   146             $peek = $stream->peek();
       
   147             if (',' == $peek || null === $peek) {
       
   148                 return $result;
       
   149             } elseif (in_array($peek, array('+', '>', '~'))) {
       
   150                 // A combinator
       
   151                 $combinator = (string) $stream->next();
       
   152             } else {
       
   153                 $combinator = ' ';
       
   154             }
       
   155             $consumed = count($stream->getUsed());
       
   156             $nextSelector = $this->parseSimpleSelector($stream);
       
   157             if ($consumed == count($stream->getUsed())) {
       
   158                 throw new ParseException(sprintf("Expected selector, got '%s'", $stream->peek()));
       
   159             }
       
   160 
       
   161             $result = new Node\CombinedSelectorNode($result, $combinator, $nextSelector);
       
   162         }
       
   163 
       
   164         return $result;
       
   165     }
       
   166 
       
   167     /**
       
   168      * Parses a simple selector (the current token) from $stream and returns
       
   169      * the resulting Node object.
       
   170      *
       
   171      * @throws ParseException When expected symbol but got something else
       
   172      *
       
   173      * @param  TokenStream $stream The stream containing the selector.
       
   174      *
       
   175      * @return Node\NodeInterface
       
   176      */
       
   177     private function parseSimpleSelector($stream)
       
   178     {
       
   179         $peek = $stream->peek();
       
   180         if ('*' != $peek && !$peek->isType('Symbol')) {
       
   181             $element = $namespace = '*';
       
   182         } else {
       
   183             $next = $stream->next();
       
   184             if ('*' != $next && !$next->isType('Symbol')) {
       
   185                 throw new ParseException(sprintf("Expected symbol, got '%s'", $next));
       
   186             }
       
   187 
       
   188             if ($stream->peek() == '|') {
       
   189                 $namespace = $next;
       
   190                 $stream->next();
       
   191                 $element = $stream->next();
       
   192                 if ('*' != $element && !$next->isType('Symbol')) {
       
   193                     throw new ParseException(sprintf("Expected symbol, got '%s'", $next));
       
   194                 }
       
   195             } else {
       
   196                 $namespace = '*';
       
   197                 $element = $next;
       
   198             }
       
   199         }
       
   200 
       
   201         $result = new Node\ElementNode($namespace, $element);
       
   202         $hasHash = false;
       
   203         while (true) {
       
   204             $peek = $stream->peek();
       
   205             if ('#' == $peek) {
       
   206                 if ($hasHash) {
       
   207                     /* You can't have two hashes
       
   208                         (FIXME: is there some more general rule I'm missing?) */
       
   209                     // @codeCoverageIgnoreStart
       
   210                     break;
       
   211                     // @codeCoverageIgnoreEnd
       
   212                 }
       
   213                 $stream->next();
       
   214                 $result = new Node\HashNode($result, $stream->next());
       
   215                 $hasHash = true;
       
   216 
       
   217                 continue;
       
   218             } elseif ('.' == $peek) {
       
   219                 $stream->next();
       
   220                 $result = new Node\ClassNode($result, $stream->next());
       
   221 
       
   222                 continue;
       
   223             } elseif ('[' == $peek) {
       
   224                 $stream->next();
       
   225                 $result = $this->parseAttrib($result, $stream);
       
   226                 $next = $stream->next();
       
   227                 if (']' != $next) {
       
   228                     throw new ParseException(sprintf("] expected, got '%s'", $next));
       
   229                 }
       
   230 
       
   231                 continue;
       
   232             } elseif (':' == $peek || '::' == $peek) {
       
   233                 $type = $stream->next();
       
   234                 $ident = $stream->next();
       
   235                 if (!$ident || !$ident->isType('Symbol')) {
       
   236                     throw new ParseException(sprintf("Expected symbol, got '%s'", $ident));
       
   237                 }
       
   238 
       
   239                 if ($stream->peek() == '(') {
       
   240                     $stream->next();
       
   241                     $peek = $stream->peek();
       
   242                     if ($peek->isType('String')) {
       
   243                         $selector = $stream->next();
       
   244                     } elseif ($peek->isType('Symbol') && is_int($peek)) {
       
   245                         $selector = intval($stream->next());
       
   246                     } else {
       
   247                         // FIXME: parseSimpleSelector, or selector, or...?
       
   248                         $selector = $this->parseSimpleSelector($stream);
       
   249                     }
       
   250                     $next = $stream->next();
       
   251                     if (')' != $next) {
       
   252                         throw new ParseException(sprintf("Expected ')', got '%s' and '%s'", $next, $selector));
       
   253                     }
       
   254 
       
   255                     $result = new Node\FunctionNode($result, $type, $ident, $selector);
       
   256                 } else {
       
   257                     $result = new Node\PseudoNode($result, $type, $ident);
       
   258                 }
       
   259 
       
   260                 continue;
       
   261             } else {
       
   262                 if (' ' == $peek) {
       
   263                     $stream->next();
       
   264                 }
       
   265 
       
   266                 break;
       
   267             }
       
   268             // FIXME: not sure what "negation" is
       
   269         }
       
   270 
       
   271         return $result;
       
   272     }
       
   273 
       
   274     /**
       
   275      * Parses an attribute from a selector contained in $stream and returns
       
   276      * the resulting AttribNode object.
       
   277      *
       
   278      * @throws ParseException When encountered unexpected selector
       
   279      *
       
   280      * @param  Node\NodeInterface $selector The selector object whose attribute
       
   281      *                                      is to be parsed.
       
   282      * @param  TokenStream        $stream    The container token stream.
       
   283      *
       
   284      * @return Node\AttribNode
       
   285      */
       
   286     private function parseAttrib($selector, $stream)
       
   287     {
       
   288         $attrib = $stream->next();
       
   289         if ($stream->peek() == '|') {
       
   290             $namespace = $attrib;
       
   291             $stream->next();
       
   292             $attrib = $stream->next();
       
   293         } else {
       
   294             $namespace = '*';
       
   295         }
       
   296 
       
   297         if ($stream->peek() == ']') {
       
   298             return new Node\AttribNode($selector, $namespace, $attrib, 'exists', null);
       
   299         }
       
   300 
       
   301         $op = $stream->next();
       
   302         if (!in_array($op, array('^=', '$=', '*=', '=', '~=', '|=', '!='))) {
       
   303             throw new ParseException(sprintf("Operator expected, got '%s'", $op));
       
   304         }
       
   305 
       
   306         $value = $stream->next();
       
   307         if (!$value->isType('Symbol') && !$value->isType('String')) {
       
   308             throw new ParseException(sprintf("Expected string or symbol, got '%s'", $value));
       
   309         }
       
   310 
       
   311         return new Node\AttribNode($selector, $namespace, $attrib, $op, $value);
       
   312     }
       
   313 }