vendor/symfony/src/Symfony/Component/CssSelector/Tokenizer.php
author ymh <ymh.work@gmail.com>
Sat, 24 Sep 2011 15:40:41 +0200
changeset 0 7f95f8617b0b
permissions -rwxr-xr-x
first commit
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
<?php
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
/*
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
 * This file is part of the Symfony package.
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
 *
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
 * (c) Fabien Potencier <fabien@symfony.com>
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
 *
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
 * For the full copyright and license information, please view the LICENSE
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
 * file that was distributed with this source code.
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
 */
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
namespace Symfony\Component\CssSelector;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
use Symfony\Component\CssSelector\Exception\ParseException;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
/**
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
 * Tokenizer lexes a CSS Selector to tokens.
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
 *
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
 * This component is a port of the Python lxml library,
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
 * which is copyright Infrae and distributed under the BSD license.
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
 *
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
 * @author Fabien Potencier <fabien@symfony.com>
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
 */
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
class Tokenizer
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
{
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
    /**
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
     * Takes a CSS selector and returns an array holding the Tokens
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
     * it contains.
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
     *
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
     * @param  string $s The selector to lex.
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
     *
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
     * @return array Token[]
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
     */
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
    public function tokenize($s)
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
    {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
        if (function_exists('mb_internal_encoding') && ((int) ini_get('mbstring.func_overload')) & 2) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
            $mbEncoding = mb_internal_encoding();
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
            mb_internal_encoding('ASCII');
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
        }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
        $tokens = array();
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
        $pos = 0;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
        $s = preg_replace('#/\*.*?\*/#s', '', $s);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
        while (true) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
            if (preg_match('#\s+#A', $s, $match, 0, $pos)) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
                $precedingWhitespacePos = $pos;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
                $pos += strlen($match[0]);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
            } else {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
                $precedingWhitespacePos = 0;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
            }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
            if ($pos >= strlen($s)) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
                if (isset($mbEncoding)) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
                    mb_internal_encoding($mbEncoding);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
                }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
                return $tokens;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
            }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
            if (preg_match('#[+-]?\d*n(?:[+-]\d+)?#A', $s, $match, 0, $pos) && 'n' !== $match[0]) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
                $sym = substr($s, $pos, strlen($match[0]));
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
                $tokens[] = new Token('Symbol', $sym, $pos);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
                $pos += strlen($match[0]);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
                continue;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
            }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
            $c = $s[$pos];
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
            $c2 = substr($s, $pos, 2);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
            if (in_array($c2, array('~=', '|=', '^=', '$=', '*=', '::', '!='))) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    72
                $tokens[] = new Token('Token', $c2, $pos);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
                $pos += 2;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
                continue;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
            }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
            if (in_array($c, array('>', '+', '~', ',', '.', '*', '=', '[', ']', '(', ')', '|', ':', '#'))) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
                if (in_array($c, array('.', '#', '[')) && $precedingWhitespacePos > 0) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    80
                    $tokens[] = new Token('Token', ' ', $precedingWhitespacePos);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
                }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
                $tokens[] = new Token('Token', $c, $pos);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
                ++$pos;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
                continue;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
            }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
            if ('"' === $c || "'" === $c) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
                // Quoted string
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
                $oldPos = $pos;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
                list($sym, $pos) = $this->tokenizeEscapedString($s, $pos);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    93
                $tokens[] = new Token('String', $sym, $oldPos);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
                continue;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    96
            }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    97
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    98
            $oldPos = $pos;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    99
            list($sym, $pos) = $this->tokenizeSymbol($s, $pos);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   101
            $tokens[] = new Token('Symbol', $sym, $oldPos);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   102
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   103
            continue;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   104
        }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   105
    }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   106
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   107
    /**
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   108
     * Tokenizes a quoted string (i.e. 'A string quoted with \' characters'),
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   109
     * and returns an array holding the unquoted string contained by $s and
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   110
     * the new position from which tokenizing should take over.
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   111
     *
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   112
     * @throws ParseException When expected closing is not found
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   113
     *
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   114
     * @param  string  $s   The selector string containing the quoted string.
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   115
     * @param  integer $pos The starting position for the quoted string.
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   116
     *
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   117
     * @return array
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   118
     */
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   119
    private function tokenizeEscapedString($s, $pos)
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   120
    {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   121
        $quote = $s[$pos];
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   122
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   123
        $pos = $pos + 1;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   124
        $start = $pos;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   125
        while (true) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   126
            $next = strpos($s, $quote, $pos);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   127
            if (false === $next) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   128
                throw new ParseException(sprintf('Expected closing %s for string in: %s', $quote, substr($s, $start)));
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   129
            }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   130
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   131
            $result = substr($s, $start, $next - $start);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   132
            if ('\\' === $result[strlen($result) - 1]) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   133
                // next quote character is escaped
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   134
                $pos = $next + 1;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   135
                continue;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   136
            }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   137
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   138
            if (false !== strpos($result, '\\')) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   139
                $result = $this->unescapeStringLiteral($result);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   140
            }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   141
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   142
            return array($result, $next + 1);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   143
        }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   144
    }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   145
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   146
    /**
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   147
     * Unescapes a string literal and returns the unescaped string.
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   148
     *
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   149
     * @throws ParseException When invalid escape sequence is found
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   150
     *
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   151
     * @param  string $literal The string literal to unescape.
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   152
     *
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   153
     * @return string
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   154
     */
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   155
    private function unescapeStringLiteral($literal)
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   156
    {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   157
        return preg_replace_callback('#(\\\\(?:[A-Fa-f0-9]{1,6}(?:\r\n|\s)?|[^A-Fa-f0-9]))#', function ($matches) use ($literal)
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   158
        {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   159
            if ($matches[0][0] == '\\' && strlen($matches[0]) > 1) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   160
                $matches[0] = substr($matches[0], 1);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   161
                if (in_array($matches[0][0], array('0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'A', 'B', 'C', 'D', 'E', 'F', 'a', 'b', 'c', 'd', 'e', 'f'))) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   162
                    return chr(trim($matches[0]));
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   163
                }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   164
            } else {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   165
                throw new ParseException(sprintf('Invalid escape sequence %s in string %s', $matches[0], $literal));
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   166
            }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   167
        }, $literal);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   168
    }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   169
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   170
    /**
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   171
     * Lexes selector $s and returns an array holding the name of the symbol
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   172
     * contained in it and the new position from which tokenizing should take
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   173
     * over.
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   174
     *
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   175
     * @throws ParseException When Unexpected symbol is found
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   176
     *
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   177
     * @param  string  $s   The selector string.
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   178
     * @param  integer $pos The position in $s at which the symbol starts.
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   179
     *
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   180
     * @return array
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   181
     */
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   182
    private function tokenizeSymbol($s, $pos)
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   183
    {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   184
        $start = $pos;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   185
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   186
        if (!preg_match('#[^\w\-]#', $s, $match, PREG_OFFSET_CAPTURE, $pos)) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   187
            // Goes to end of s
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   188
            return array(substr($s, $start), strlen($s));
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   189
        }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   190
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   191
        $matchStart = $match[0][1];
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   192
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   193
        if ($matchStart == $pos) {
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   194
            throw new ParseException(sprintf('Unexpected symbol: %s at %s', $s[$pos], $pos));
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   195
        }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   196
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   197
        $result = substr($s, $start, $matchStart - $start);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   198
        $pos = $matchStart;
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   199
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   200
        return array($result, $pos);
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   201
    }
7f95f8617b0b first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   202
}