|
0
|
1 |
<?php |
|
|
2 |
|
|
|
3 |
/* |
|
|
4 |
* This file is part of Twig. |
|
|
5 |
* |
|
|
6 |
* (c) 2009 Fabien Potencier |
|
|
7 |
* (c) 2009 Armin Ronacher |
|
|
8 |
* |
|
|
9 |
* For the full copyright and license information, please view the LICENSE |
|
|
10 |
* file that was distributed with this source code. |
|
|
11 |
*/ |
|
|
12 |
|
|
|
13 |
/** |
|
|
14 |
* Lexes a template string. |
|
|
15 |
* |
|
|
16 |
* @package twig |
|
|
17 |
* @author Fabien Potencier <fabien@symfony.com> |
|
|
18 |
*/ |
|
|
19 |
class Twig_Lexer implements Twig_LexerInterface |
|
|
20 |
{ |
|
|
21 |
protected $tokens; |
|
|
22 |
protected $code; |
|
|
23 |
protected $cursor; |
|
|
24 |
protected $lineno; |
|
|
25 |
protected $end; |
|
|
26 |
protected $state; |
|
|
27 |
protected $brackets; |
|
|
28 |
|
|
|
29 |
protected $env; |
|
|
30 |
protected $filename; |
|
|
31 |
protected $options; |
|
|
32 |
protected $operatorRegex; |
|
|
33 |
|
|
|
34 |
const STATE_DATA = 0; |
|
|
35 |
const STATE_BLOCK = 1; |
|
|
36 |
const STATE_VAR = 2; |
|
|
37 |
|
|
|
38 |
const REGEX_NAME = '/[A-Za-z_][A-Za-z0-9_]*/A'; |
|
|
39 |
const REGEX_NUMBER = '/[0-9]+(?:\.[0-9]+)?/A'; |
|
|
40 |
const REGEX_STRING = '/"([^"\\\\]*(?:\\\\.[^"\\\\]*)*)"|\'([^\'\\\\]*(?:\\\\.[^\'\\\\]*)*)\'/As'; |
|
|
41 |
const PUNCTUATION = '()[]{}?:.,|'; |
|
|
42 |
|
|
|
43 |
public function __construct(Twig_Environment $env, array $options = array()) |
|
|
44 |
{ |
|
|
45 |
$this->env = $env; |
|
|
46 |
|
|
|
47 |
$this->options = array_merge(array( |
|
|
48 |
'tag_comment' => array('{#', '#}'), |
|
|
49 |
'tag_block' => array('{%', '%}'), |
|
|
50 |
'tag_variable' => array('{{', '}}'), |
|
|
51 |
'whitespace_trim' => '-', |
|
|
52 |
), $options); |
|
|
53 |
} |
|
|
54 |
|
|
|
55 |
/** |
|
|
56 |
* Tokenizes a source code. |
|
|
57 |
* |
|
|
58 |
* @param string $code The source code |
|
|
59 |
* @param string $filename A unique identifier for the source code |
|
|
60 |
* |
|
|
61 |
* @return Twig_TokenStream A token stream instance |
|
|
62 |
*/ |
|
|
63 |
public function tokenize($code, $filename = null) |
|
|
64 |
{ |
|
|
65 |
if (function_exists('mb_internal_encoding') && ((int) ini_get('mbstring.func_overload')) & 2) { |
|
|
66 |
$mbEncoding = mb_internal_encoding(); |
|
|
67 |
mb_internal_encoding('ASCII'); |
|
|
68 |
} |
|
|
69 |
|
|
|
70 |
$this->code = str_replace(array("\r\n", "\r"), "\n", $code); |
|
|
71 |
$this->filename = $filename; |
|
|
72 |
$this->cursor = 0; |
|
|
73 |
$this->lineno = 1; |
|
|
74 |
$this->end = strlen($this->code); |
|
|
75 |
$this->tokens = array(); |
|
|
76 |
$this->state = self::STATE_DATA; |
|
|
77 |
$this->brackets = array(); |
|
|
78 |
|
|
|
79 |
while ($this->cursor < $this->end) { |
|
|
80 |
// dispatch to the lexing functions depending |
|
|
81 |
// on the current state |
|
|
82 |
switch ($this->state) { |
|
|
83 |
case self::STATE_DATA: |
|
|
84 |
$this->lexData(); |
|
|
85 |
break; |
|
|
86 |
|
|
|
87 |
case self::STATE_BLOCK: |
|
|
88 |
$this->lexBlock(); |
|
|
89 |
break; |
|
|
90 |
|
|
|
91 |
case self::STATE_VAR: |
|
|
92 |
$this->lexVar(); |
|
|
93 |
break; |
|
|
94 |
} |
|
|
95 |
} |
|
|
96 |
|
|
|
97 |
$this->pushToken(Twig_Token::EOF_TYPE); |
|
|
98 |
|
|
|
99 |
if (!empty($this->brackets)) { |
|
|
100 |
list($expect, $lineno) = array_pop($this->brackets); |
|
|
101 |
throw new Twig_Error_Syntax(sprintf('Unclosed "%s"', $expect), $lineno, $this->filename); |
|
|
102 |
} |
|
|
103 |
|
|
|
104 |
if (isset($mbEncoding)) { |
|
|
105 |
mb_internal_encoding($mbEncoding); |
|
|
106 |
} |
|
|
107 |
|
|
|
108 |
return new Twig_TokenStream($this->tokens, $this->filename); |
|
|
109 |
} |
|
|
110 |
|
|
|
111 |
protected function lexData() |
|
|
112 |
{ |
|
|
113 |
$pos = $this->end; |
|
|
114 |
$append = ''; |
|
|
115 |
|
|
|
116 |
// Find the first token after the cursor |
|
|
117 |
foreach (array('tag_comment', 'tag_variable', 'tag_block') as $type) { |
|
|
118 |
$tmpPos = strpos($this->code, $this->options[$type][0], $this->cursor); |
|
|
119 |
if (false !== $tmpPos && $tmpPos < $pos) { |
|
|
120 |
$trimBlock = false; |
|
|
121 |
$append = ''; |
|
|
122 |
$pos = $tmpPos; |
|
|
123 |
$token = $this->options[$type][0]; |
|
|
124 |
if (strpos($this->code, $this->options['whitespace_trim'], $pos) === ($pos + strlen($token))) { |
|
|
125 |
$trimBlock = true; |
|
|
126 |
$append = $this->options['whitespace_trim']; |
|
|
127 |
} |
|
|
128 |
} |
|
|
129 |
} |
|
|
130 |
|
|
|
131 |
// if no matches are left we return the rest of the template as simple text token |
|
|
132 |
if ($pos === $this->end) { |
|
|
133 |
$this->pushToken(Twig_Token::TEXT_TYPE, substr($this->code, $this->cursor)); |
|
|
134 |
$this->cursor = $this->end; |
|
|
135 |
return; |
|
|
136 |
} |
|
|
137 |
|
|
|
138 |
// push the template text first |
|
|
139 |
$text = $textContent = substr($this->code, $this->cursor, $pos - $this->cursor); |
|
|
140 |
if (true === $trimBlock) { |
|
|
141 |
$text = rtrim($text); |
|
|
142 |
} |
|
|
143 |
$this->pushToken(Twig_Token::TEXT_TYPE, $text); |
|
|
144 |
$this->moveCursor($textContent.$token.$append); |
|
|
145 |
|
|
|
146 |
switch ($token) { |
|
|
147 |
case $this->options['tag_comment'][0]: |
|
|
148 |
$this->lexComment(); |
|
|
149 |
break; |
|
|
150 |
|
|
|
151 |
case $this->options['tag_block'][0]: |
|
|
152 |
// raw data? |
|
|
153 |
if (preg_match('/\s*raw\s*'.preg_quote($this->options['tag_block'][1], '/').'/As', $this->code, $match, null, $this->cursor)) { |
|
|
154 |
$this->moveCursor($match[0]); |
|
|
155 |
$this->lexRawData(); |
|
|
156 |
$this->state = self::STATE_DATA; |
|
|
157 |
// {% line \d+ %} |
|
|
158 |
} else if (preg_match('/\s*line\s+(\d+)\s*'.preg_quote($this->options['tag_block'][1], '/').'/As', $this->code, $match, null, $this->cursor)) { |
|
|
159 |
$this->moveCursor($match[0]); |
|
|
160 |
$this->lineno = (int) $match[1]; |
|
|
161 |
$this->state = self::STATE_DATA; |
|
|
162 |
} else { |
|
|
163 |
$this->pushToken(Twig_Token::BLOCK_START_TYPE); |
|
|
164 |
$this->state = self::STATE_BLOCK; |
|
|
165 |
} |
|
|
166 |
break; |
|
|
167 |
|
|
|
168 |
case $this->options['tag_variable'][0]: |
|
|
169 |
$this->pushToken(Twig_Token::VAR_START_TYPE); |
|
|
170 |
$this->state = self::STATE_VAR; |
|
|
171 |
break; |
|
|
172 |
} |
|
|
173 |
} |
|
|
174 |
|
|
|
175 |
protected function lexBlock() |
|
|
176 |
{ |
|
|
177 |
$trimTag = preg_quote($this->options['whitespace_trim'].$this->options['tag_block'][1], '/'); |
|
|
178 |
$endTag = preg_quote($this->options['tag_block'][1], '/'); |
|
|
179 |
|
|
|
180 |
if (empty($this->brackets) && preg_match('/\s*(?:'.$trimTag.'\s*|\s*'.$endTag.')\n?/A', $this->code, $match, null, $this->cursor)) { |
|
|
181 |
$this->pushToken(Twig_Token::BLOCK_END_TYPE); |
|
|
182 |
$this->moveCursor($match[0]); |
|
|
183 |
$this->state = self::STATE_DATA; |
|
|
184 |
} else { |
|
|
185 |
$this->lexExpression(); |
|
|
186 |
} |
|
|
187 |
} |
|
|
188 |
|
|
|
189 |
protected function lexVar() |
|
|
190 |
{ |
|
|
191 |
$trimTag = preg_quote($this->options['whitespace_trim'].$this->options['tag_variable'][1], '/'); |
|
|
192 |
$endTag = preg_quote($this->options['tag_variable'][1], '/'); |
|
|
193 |
|
|
|
194 |
if (empty($this->brackets) && preg_match('/\s*'.$trimTag.'\s*|\s*'.$endTag.'/A', $this->code, $match, null, $this->cursor)) { |
|
|
195 |
$this->pushToken(Twig_Token::VAR_END_TYPE); |
|
|
196 |
$this->moveCursor($match[0]); |
|
|
197 |
$this->state = self::STATE_DATA; |
|
|
198 |
} else { |
|
|
199 |
$this->lexExpression(); |
|
|
200 |
} |
|
|
201 |
} |
|
|
202 |
|
|
|
203 |
protected function lexExpression() |
|
|
204 |
{ |
|
|
205 |
// whitespace |
|
|
206 |
if (preg_match('/\s+/A', $this->code, $match, null, $this->cursor)) { |
|
|
207 |
$this->moveCursor($match[0]); |
|
|
208 |
|
|
|
209 |
if ($this->cursor >= $this->end) { |
|
|
210 |
throw new Twig_Error_Syntax(sprintf('Unexpected end of file: Unclosed "%s"', $this->state === self::STATE_BLOCK ? 'block' : 'variable')); |
|
|
211 |
} |
|
|
212 |
} |
|
|
213 |
|
|
|
214 |
// operators |
|
|
215 |
if (preg_match($this->getOperatorRegex(), $this->code, $match, null, $this->cursor)) { |
|
|
216 |
$this->pushToken(Twig_Token::OPERATOR_TYPE, $match[0]); |
|
|
217 |
$this->moveCursor($match[0]); |
|
|
218 |
} |
|
|
219 |
// names |
|
|
220 |
elseif (preg_match(self::REGEX_NAME, $this->code, $match, null, $this->cursor)) { |
|
|
221 |
$this->pushToken(Twig_Token::NAME_TYPE, $match[0]); |
|
|
222 |
$this->moveCursor($match[0]); |
|
|
223 |
} |
|
|
224 |
// numbers |
|
|
225 |
elseif (preg_match(self::REGEX_NUMBER, $this->code, $match, null, $this->cursor)) { |
|
|
226 |
$this->pushToken(Twig_Token::NUMBER_TYPE, ctype_digit($match[0]) ? (int) $match[0] : (float) $match[0]); |
|
|
227 |
$this->moveCursor($match[0]); |
|
|
228 |
} |
|
|
229 |
// punctuation |
|
|
230 |
elseif (false !== strpos(self::PUNCTUATION, $this->code[$this->cursor])) { |
|
|
231 |
// opening bracket |
|
|
232 |
if (false !== strpos('([{', $this->code[$this->cursor])) { |
|
|
233 |
$this->brackets[] = array($this->code[$this->cursor], $this->lineno); |
|
|
234 |
} |
|
|
235 |
// closing bracket |
|
|
236 |
elseif (false !== strpos(')]}', $this->code[$this->cursor])) { |
|
|
237 |
if (empty($this->brackets)) { |
|
|
238 |
throw new Twig_Error_Syntax(sprintf('Unexpected "%s"', $this->code[$this->cursor]), $this->lineno, $this->filename); |
|
|
239 |
} |
|
|
240 |
|
|
|
241 |
list($expect, $lineno) = array_pop($this->brackets); |
|
|
242 |
if ($this->code[$this->cursor] != strtr($expect, '([{', ')]}')) { |
|
|
243 |
throw new Twig_Error_Syntax(sprintf('Unclosed "%s"', $expect), $lineno, $this->filename); |
|
|
244 |
} |
|
|
245 |
} |
|
|
246 |
|
|
|
247 |
$this->pushToken(Twig_Token::PUNCTUATION_TYPE, $this->code[$this->cursor]); |
|
|
248 |
++$this->cursor; |
|
|
249 |
} |
|
|
250 |
// strings |
|
|
251 |
elseif (preg_match(self::REGEX_STRING, $this->code, $match, null, $this->cursor)) { |
|
|
252 |
$this->pushToken(Twig_Token::STRING_TYPE, stripcslashes(substr($match[0], 1, -1))); |
|
|
253 |
$this->moveCursor($match[0]); |
|
|
254 |
} |
|
|
255 |
// unlexable |
|
|
256 |
else { |
|
|
257 |
throw new Twig_Error_Syntax(sprintf('Unexpected character "%s"', $this->code[$this->cursor]), $this->lineno, $this->filename); |
|
|
258 |
} |
|
|
259 |
} |
|
|
260 |
|
|
|
261 |
protected function lexRawData() |
|
|
262 |
{ |
|
|
263 |
if (!preg_match('/'.preg_quote($this->options['tag_block'][0], '/').'\s*endraw\s*'.preg_quote($this->options['tag_block'][1], '/').'/s', $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) { |
|
|
264 |
throw new Twig_Error_Syntax(sprintf('Unexpected end of file: Unclosed "block"')); |
|
|
265 |
} |
|
|
266 |
$text = substr($this->code, $this->cursor, $match[0][1] - $this->cursor); |
|
|
267 |
$this->pushToken(Twig_Token::TEXT_TYPE, $text); |
|
|
268 |
$this->moveCursor($text.$match[0][0]); |
|
|
269 |
} |
|
|
270 |
|
|
|
271 |
protected function lexComment() |
|
|
272 |
{ |
|
|
273 |
$commentEndRegex = '/(?:'.preg_quote($this->options['whitespace_trim'], '/') |
|
|
274 |
.preg_quote($this->options['tag_comment'][1], '/').'\s*|' |
|
|
275 |
.preg_quote($this->options['tag_comment'][1], '/').')\n?/s'; |
|
|
276 |
|
|
|
277 |
if (!preg_match($commentEndRegex, $this->code, $match, PREG_OFFSET_CAPTURE, $this->cursor)) { |
|
|
278 |
throw new Twig_Error_Syntax('Unclosed comment', $this->lineno, $this->filename); |
|
|
279 |
} |
|
|
280 |
|
|
|
281 |
$this->moveCursor(substr($this->code, $this->cursor, $match[0][1] - $this->cursor).$match[0][0]); |
|
|
282 |
} |
|
|
283 |
|
|
|
284 |
protected function pushToken($type, $value = '') |
|
|
285 |
{ |
|
|
286 |
// do not push empty text tokens |
|
|
287 |
if (Twig_Token::TEXT_TYPE === $type && '' === $value) { |
|
|
288 |
return; |
|
|
289 |
} |
|
|
290 |
|
|
|
291 |
$this->tokens[] = new Twig_Token($type, $value, $this->lineno); |
|
|
292 |
} |
|
|
293 |
|
|
|
294 |
protected function moveCursor($text) |
|
|
295 |
{ |
|
|
296 |
$this->cursor += strlen($text); |
|
|
297 |
$this->lineno += substr_count($text, "\n"); |
|
|
298 |
} |
|
|
299 |
|
|
|
300 |
protected function getOperatorRegex() |
|
|
301 |
{ |
|
|
302 |
if (null !== $this->operatorRegex) { |
|
|
303 |
return $this->operatorRegex; |
|
|
304 |
} |
|
|
305 |
|
|
|
306 |
$operators = array_merge( |
|
|
307 |
array('='), |
|
|
308 |
array_keys($this->env->getUnaryOperators()), |
|
|
309 |
array_keys($this->env->getBinaryOperators()) |
|
|
310 |
); |
|
|
311 |
|
|
|
312 |
$operators = array_combine($operators, array_map('strlen', $operators)); |
|
|
313 |
arsort($operators); |
|
|
314 |
|
|
|
315 |
$regex = array(); |
|
|
316 |
foreach ($operators as $operator => $length) { |
|
|
317 |
// an operator that ends with a character must be followed by |
|
|
318 |
// a whitespace or a parenthesis |
|
|
319 |
if (ctype_alpha($operator[$length - 1])) { |
|
|
320 |
$regex[] = preg_quote($operator, '/').'(?=[ ()])'; |
|
|
321 |
} else { |
|
|
322 |
$regex[] = preg_quote($operator, '/'); |
|
|
323 |
} |
|
|
324 |
} |
|
|
325 |
|
|
|
326 |
return $this->operatorRegex = '/'.implode('|', $regex).'/A'; |
|
|
327 |
} |
|
|
328 |
} |