|
1 <?php |
|
2 /** |
|
3 * Zend Framework |
|
4 * |
|
5 * LICENSE |
|
6 * |
|
7 * This source file is subject to the new BSD license that is bundled |
|
8 * with this package in the file LICENSE.txt. |
|
9 * It is also available through the world-wide-web at this URL: |
|
10 * http://framework.zend.com/license/new-bsd |
|
11 * If you did not receive a copy of the license and are unable to |
|
12 * obtain it through the world-wide-web, please send an email |
|
13 * to license@zend.com so we can send you a copy immediately. |
|
14 * |
|
15 * @category Zend |
|
16 * @package Zend_Markup |
|
17 * @subpackage Parser |
|
18 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
19 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
20 * @version $Id: Bbcode.php 21127 2010-02-21 15:35:03Z kokx $ |
|
21 */ |
|
22 |
|
23 /** |
|
24 * @see Zend_Markup_TokenList |
|
25 */ |
|
26 require_once 'Zend/Markup/TokenList.php'; |
|
27 |
|
28 /** |
|
29 * @see Zend_Markup_Parser_ParserInterface |
|
30 */ |
|
31 require_once 'Zend/Markup/Parser/ParserInterface.php'; |
|
32 |
|
33 /** |
|
34 * @category Zend |
|
35 * @package Zend_Markup |
|
36 * @subpackage Parser |
|
37 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
38 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
39 */ |
|
40 class Zend_Markup_Parser_Bbcode implements Zend_Markup_Parser_ParserInterface |
|
41 { |
|
42 const NEWLINE = "[newline\0]"; |
|
43 |
|
44 // there is a parsing difference between the default tags and single tags |
|
45 const TYPE_DEFAULT = 'default'; |
|
46 const TYPE_SINGLE = 'single'; |
|
47 |
|
48 const NAME_CHARSET = '^\[\]=\s'; |
|
49 |
|
50 const STATE_SCAN = 0; |
|
51 const STATE_SCANATTRS = 1; |
|
52 const STATE_PARSEVALUE = 2; |
|
53 |
|
54 /** |
|
55 * Token tree |
|
56 * |
|
57 * @var Zend_Markup_TokenList |
|
58 */ |
|
59 protected $_tree; |
|
60 |
|
61 /** |
|
62 * Current token |
|
63 * |
|
64 * @var Zend_Markup_Token |
|
65 */ |
|
66 protected $_current; |
|
67 |
|
68 /** |
|
69 * Source to tokenize |
|
70 * |
|
71 * @var string |
|
72 */ |
|
73 protected $_value = ''; |
|
74 |
|
75 /** |
|
76 * Length of the value |
|
77 * |
|
78 * @var int |
|
79 */ |
|
80 protected $_valueLen = 0; |
|
81 |
|
82 /** |
|
83 * Current pointer |
|
84 * |
|
85 * @var int |
|
86 */ |
|
87 protected $_pointer = 0; |
|
88 |
|
89 /** |
|
90 * The buffer |
|
91 * |
|
92 * @var string |
|
93 */ |
|
94 protected $_buffer = ''; |
|
95 |
|
96 /** |
|
97 * Temporary tag storage |
|
98 * |
|
99 * @var array |
|
100 */ |
|
101 protected $_temp; |
|
102 |
|
103 /** |
|
104 * Stoppers that we are searching for |
|
105 * |
|
106 * @var array |
|
107 */ |
|
108 protected $_searchedStoppers = array(); |
|
109 |
|
110 /** |
|
111 * Tag information |
|
112 * |
|
113 * @var array |
|
114 */ |
|
115 protected $_tags = array( |
|
116 'Zend_Markup_Root' => array( |
|
117 'type' => self::TYPE_DEFAULT, |
|
118 'stoppers' => array(), |
|
119 ), |
|
120 '*' => array( |
|
121 'type' => self::TYPE_DEFAULT, |
|
122 'stoppers' => array(self::NEWLINE, '[/*]', '[/]'), |
|
123 ), |
|
124 'hr' => array( |
|
125 'type' => self::TYPE_SINGLE, |
|
126 'stoppers' => array(), |
|
127 ), |
|
128 'code' => array( |
|
129 'type' => self::TYPE_DEFAULT, |
|
130 'stoppers' => array('[/code]', '[/]'), |
|
131 'parse_inside' => false |
|
132 ) |
|
133 ); |
|
134 |
|
135 /** |
|
136 * Token array |
|
137 * |
|
138 * @var array |
|
139 */ |
|
140 protected $_tokens = array(); |
|
141 |
|
142 /** |
|
143 * State |
|
144 * |
|
145 * @var int |
|
146 */ |
|
147 protected $_state = self::STATE_SCAN; |
|
148 |
|
149 |
|
150 /** |
|
151 * Prepare the parsing of a bbcode string, the real parsing is done in {@link _parse()} |
|
152 * |
|
153 * @param string $value |
|
154 * @return Zend_Markup_TokenList |
|
155 */ |
|
156 public function parse($value) |
|
157 { |
|
158 if (!is_string($value)) { |
|
159 /** |
|
160 * @see Zend_Markup_Parser_Exception |
|
161 */ |
|
162 require_once 'Zend/Markup/Parser/Exception.php'; |
|
163 throw new Zend_Markup_Parser_Exception('Value to parse should be a string.'); |
|
164 } |
|
165 |
|
166 if (empty($value)) { |
|
167 /** |
|
168 * @see Zend_Markup_Parser_Exception |
|
169 */ |
|
170 require_once 'Zend/Markup/Parser/Exception.php'; |
|
171 throw new Zend_Markup_Parser_Exception('Value to parse cannot be left empty.'); |
|
172 } |
|
173 |
|
174 $this->_value = str_replace(array("\r\n", "\r", "\n"), self::NEWLINE, $value); |
|
175 |
|
176 // variable initialization for tokenizer |
|
177 $this->_valueLen = strlen($this->_value); |
|
178 $this->_pointer = 0; |
|
179 $this->_buffer = ''; |
|
180 $this->_temp = array(); |
|
181 $this->_state = self::STATE_SCAN; |
|
182 $this->_tokens = array(); |
|
183 |
|
184 $this->_tokenize(); |
|
185 |
|
186 // variable initialization for treebuilder |
|
187 $this->_searchedStoppers = array(); |
|
188 $this->_tree = new Zend_Markup_TokenList(); |
|
189 $this->_current = new Zend_Markup_Token( |
|
190 '', |
|
191 Zend_Markup_Token::TYPE_NONE, |
|
192 'Zend_Markup_Root' |
|
193 ); |
|
194 |
|
195 $this->_tree->addChild($this->_current); |
|
196 |
|
197 $this->_createTree(); |
|
198 |
|
199 return $this->_tree; |
|
200 } |
|
201 |
|
202 /** |
|
203 * Tokenize |
|
204 * |
|
205 * @param string $input |
|
206 * |
|
207 * @return void |
|
208 */ |
|
209 protected function _tokenize() |
|
210 { |
|
211 $attribute = ''; |
|
212 |
|
213 while ($this->_pointer < $this->_valueLen) { |
|
214 switch ($this->_state) { |
|
215 case self::STATE_SCAN: |
|
216 $matches = array(); |
|
217 $regex = '#\G(?<text>[^\[]*)(?<open>\[(?<name>[' . self::NAME_CHARSET . ']+)?)?#'; |
|
218 preg_match($regex, $this->_value, $matches, null, $this->_pointer); |
|
219 |
|
220 $this->_pointer += strlen($matches[0]); |
|
221 |
|
222 if (!empty($matches['text'])) { |
|
223 $this->_buffer .= $matches['text']; |
|
224 } |
|
225 |
|
226 if (!isset($matches['open'])) { |
|
227 // great, no tag, we are ending the string |
|
228 break; |
|
229 } |
|
230 if (!isset($matches['name'])) { |
|
231 $this->_buffer .= $matches['open']; |
|
232 break; |
|
233 } |
|
234 |
|
235 $this->_temp = array( |
|
236 'tag' => '[' . $matches['name'], |
|
237 'name' => $matches['name'], |
|
238 'attributes' => array() |
|
239 ); |
|
240 |
|
241 if ($this->_pointer >= $this->_valueLen) { |
|
242 // damn, no tag |
|
243 $this->_buffer .= $this->_temp['tag']; |
|
244 break 2; |
|
245 } |
|
246 |
|
247 if ($this->_value[$this->_pointer] == '=') { |
|
248 $this->_pointer++; |
|
249 |
|
250 $this->_temp['tag'] .= '='; |
|
251 $this->_state = self::STATE_PARSEVALUE; |
|
252 $attribute = $this->_temp['name']; |
|
253 } else { |
|
254 $this->_state = self::STATE_SCANATTRS; |
|
255 } |
|
256 break; |
|
257 case self::STATE_SCANATTRS: |
|
258 $matches = array(); |
|
259 $regex = '#\G((?<end>\s*\])|\s+(?<attribute>[' . self::NAME_CHARSET . ']+)(?<eq>=?))#'; |
|
260 if (!preg_match($regex, $this->_value, $matches, null, $this->_pointer)) { |
|
261 break 2; |
|
262 } |
|
263 |
|
264 $this->_pointer += strlen($matches[0]); |
|
265 |
|
266 if (!empty($matches['end'])) { |
|
267 if (!empty($this->_buffer)) { |
|
268 $this->_tokens[] = array( |
|
269 'tag' => $this->_buffer, |
|
270 'type' => Zend_Markup_Token::TYPE_NONE |
|
271 ); |
|
272 $this->_buffer = ''; |
|
273 } |
|
274 $this->_temp['tag'] .= $matches['end']; |
|
275 $this->_temp['type'] = Zend_Markup_Token::TYPE_TAG; |
|
276 |
|
277 $this->_tokens[] = $this->_temp; |
|
278 $this->_temp = array(); |
|
279 |
|
280 $this->_state = self::STATE_SCAN; |
|
281 } else { |
|
282 // attribute name |
|
283 $attribute = $matches['attribute']; |
|
284 |
|
285 $this->_temp['tag'] .= $matches[0]; |
|
286 |
|
287 $this->_temp['attributes'][$attribute] = ''; |
|
288 |
|
289 if (empty($matches['eq'])) { |
|
290 $this->_state = self::STATE_SCANATTRS; |
|
291 } else { |
|
292 $this->_state = self::STATE_PARSEVALUE; |
|
293 } |
|
294 } |
|
295 break; |
|
296 case self::STATE_PARSEVALUE: |
|
297 $matches = array(); |
|
298 $regex = '#\G((?<quote>"|\')(?<valuequote>.*?)\\2|(?<value>[^\]\s]+))#'; |
|
299 if (!preg_match($regex, $this->_value, $matches, null, $this->_pointer)) { |
|
300 $this->_state = self::STATE_SCANATTRS; |
|
301 break; |
|
302 } |
|
303 |
|
304 $this->_pointer += strlen($matches[0]); |
|
305 |
|
306 if (!empty($matches['quote'])) { |
|
307 $this->_temp['attributes'][$attribute] = $matches['valuequote']; |
|
308 } else { |
|
309 $this->_temp['attributes'][$attribute] = $matches['value']; |
|
310 } |
|
311 $this->_temp['tag'] .= $matches[0]; |
|
312 |
|
313 $this->_state = self::STATE_SCANATTRS; |
|
314 break; |
|
315 } |
|
316 } |
|
317 |
|
318 if (!empty($this->_buffer)) { |
|
319 $this->_tokens[] = array( |
|
320 'tag' => $this->_buffer, |
|
321 'type' => Zend_Markup_Token::TYPE_NONE |
|
322 ); |
|
323 } |
|
324 } |
|
325 |
|
326 /** |
|
327 * Parse the token array into a tree |
|
328 * |
|
329 * @param array $tokens |
|
330 * |
|
331 * @return void |
|
332 */ |
|
333 public function _createTree() |
|
334 { |
|
335 foreach ($this->_tokens as $token) { |
|
336 // first we want to know if this tag is a stopper, or at least a searched one |
|
337 if ($this->_isStopper($token['tag'])) { |
|
338 // find the stopper |
|
339 $oldItems = array(); |
|
340 |
|
341 while (!in_array($token['tag'], $this->_tags[$this->_current->getName()]['stoppers'])) { |
|
342 $oldItems[] = clone $this->_current; |
|
343 $this->_current = $this->_current->getParent(); |
|
344 } |
|
345 |
|
346 // we found the stopper, so stop the tag |
|
347 $this->_current->setStopper($token['tag']); |
|
348 $this->_removeFromSearchedStoppers($this->_current); |
|
349 $this->_current = $this->_current->getParent(); |
|
350 |
|
351 // add the old items again if there are any |
|
352 if (!empty($oldItems)) { |
|
353 foreach (array_reverse($oldItems) as $item) { |
|
354 /* @var $token Zend_Markup_Token */ |
|
355 $this->_current->addChild($item); |
|
356 $item->setParent($this->_current); |
|
357 $this->_current = $item; |
|
358 } |
|
359 } |
|
360 } else { |
|
361 if ($token['type'] == Zend_Markup_Token::TYPE_TAG) { |
|
362 if ($token['tag'] == self::NEWLINE) { |
|
363 // this is a newline tag, add it as a token |
|
364 $this->_current->addChild(new Zend_Markup_Token( |
|
365 "\n", |
|
366 Zend_Markup_Token::TYPE_NONE, |
|
367 '', |
|
368 array(), |
|
369 $this->_current |
|
370 )); |
|
371 } elseif (isset($token['name']) && ($token['name'][0] == '/')) { |
|
372 // this is a stopper, add it as a empty token |
|
373 $this->_current->addChild(new Zend_Markup_Token( |
|
374 $token['tag'], |
|
375 Zend_Markup_Token::TYPE_NONE, |
|
376 '', |
|
377 array(), |
|
378 $this->_current |
|
379 )); |
|
380 } elseif (isset($this->_tags[$this->_current->getName()]['parse_inside']) |
|
381 && !$this->_tags[$this->_current->getName()]['parse_inside'] |
|
382 ) { |
|
383 $this->_current->addChild(new Zend_Markup_Token( |
|
384 $token['tag'], |
|
385 Zend_Markup_Token::TYPE_NONE, |
|
386 '', |
|
387 array(), |
|
388 $this->_current |
|
389 )); |
|
390 } else { |
|
391 // add the tag |
|
392 $child = new Zend_Markup_Token( |
|
393 $token['tag'], |
|
394 $token['type'], |
|
395 $token['name'], |
|
396 $token['attributes'], |
|
397 $this->_current |
|
398 ); |
|
399 $this->_current->addChild($child); |
|
400 |
|
401 // add stoppers for this tag, if its has stoppers |
|
402 if ($this->_getType($token['name']) == self::TYPE_DEFAULT) { |
|
403 $this->_current = $child; |
|
404 |
|
405 $this->_addToSearchedStoppers($this->_current); |
|
406 } |
|
407 } |
|
408 } else { |
|
409 // no tag, just add it as a simple token |
|
410 $this->_current->addChild(new Zend_Markup_Token( |
|
411 $token['tag'], |
|
412 Zend_Markup_Token::TYPE_NONE, |
|
413 '', |
|
414 array(), |
|
415 $this->_current |
|
416 )); |
|
417 } |
|
418 } |
|
419 } |
|
420 } |
|
421 |
|
422 /** |
|
423 * Check if there is a tag declaration, and if it isnt there, add it |
|
424 * |
|
425 * @param string $name |
|
426 * |
|
427 * @return void |
|
428 */ |
|
429 protected function _checkTagDeclaration($name) |
|
430 { |
|
431 if (!isset($this->_tags[$name])) { |
|
432 $this->_tags[$name] = array( |
|
433 'type' => self::TYPE_DEFAULT, |
|
434 'stoppers' => array( |
|
435 '[/' . $name . ']', |
|
436 '[/]' |
|
437 ) |
|
438 ); |
|
439 } |
|
440 } |
|
441 /** |
|
442 * Check the tag's type |
|
443 * |
|
444 * @param string $name |
|
445 * @return string |
|
446 */ |
|
447 protected function _getType($name) |
|
448 { |
|
449 $this->_checkTagDeclaration($name); |
|
450 |
|
451 return $this->_tags[$name]['type']; |
|
452 } |
|
453 |
|
454 /** |
|
455 * Check if the tag is a stopper |
|
456 * |
|
457 * @param string $tag |
|
458 * @return bool |
|
459 */ |
|
460 protected function _isStopper($tag) |
|
461 { |
|
462 $this->_checkTagDeclaration($this->_current->getName()); |
|
463 |
|
464 if (!empty($this->_searchedStoppers[$tag])) { |
|
465 return true; |
|
466 } |
|
467 |
|
468 return false; |
|
469 } |
|
470 |
|
471 /** |
|
472 * Add to searched stoppers |
|
473 * |
|
474 * @param Zend_Markup_Token $token |
|
475 * @return void |
|
476 */ |
|
477 protected function _addToSearchedStoppers(Zend_Markup_Token $token) |
|
478 { |
|
479 $this->_checkTagDeclaration($token->getName()); |
|
480 |
|
481 foreach ($this->_tags[$token->getName()]['stoppers'] as $stopper) { |
|
482 if (!isset($this->_searchedStoppers[$stopper])) { |
|
483 $this->_searchedStoppers[$stopper] = 0; |
|
484 } |
|
485 ++$this->_searchedStoppers[$stopper]; |
|
486 } |
|
487 } |
|
488 |
|
489 /** |
|
490 * Remove from searched stoppers |
|
491 * |
|
492 * @param Zend_Markup_Token $token |
|
493 * @return void |
|
494 */ |
|
495 protected function _removeFromSearchedStoppers(Zend_Markup_Token $token) |
|
496 { |
|
497 $this->_checkTagDeclaration($token->getName()); |
|
498 |
|
499 foreach ($this->_tags[$token->getName()]['stoppers'] as $stopper) { |
|
500 --$this->_searchedStoppers[$stopper]; |
|
501 } |
|
502 } |
|
503 |
|
504 } |