|
1 <?php |
|
2 /** |
|
3 * Zend Framework |
|
4 * |
|
5 * LICENSE |
|
6 * |
|
7 * This source file is subject to the new BSD license that is bundled |
|
8 * with this package in the file LICENSE.txt. |
|
9 * It is also available through the world-wide-web at this URL: |
|
10 * http://framework.zend.com/license/new-bsd |
|
11 * If you did not receive a copy of the license and are unable to |
|
12 * obtain it through the world-wide-web, please send an email |
|
13 * to license@zend.com so we can send you a copy immediately. |
|
14 * |
|
15 * @category Zend |
|
16 * @package Zend_Markup |
|
17 * @subpackage Parser |
|
18 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
19 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
20 * @version $Id: Textile.php 20277 2010-01-14 14:17:12Z kokx $ |
|
21 */ |
|
22 |
|
23 /** |
|
24 * @see Zend_Markup_TokenList |
|
25 */ |
|
26 require_once 'Zend/Markup/TokenList.php'; |
|
27 |
|
28 /** |
|
29 * @see Zend_Markup_Parser_ParserInterface |
|
30 */ |
|
31 require_once 'Zend/Markup/Parser/ParserInterface.php'; |
|
32 |
|
33 /** |
|
34 * @category Zend |
|
35 * @package Zend_Markup |
|
36 * @subpackage Parser |
|
37 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
38 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
39 */ |
|
40 class Zend_Markup_Parser_Textile implements Zend_Markup_Parser_ParserInterface |
|
41 { |
|
42 |
|
43 const STATE_SCAN = 0; |
|
44 const STATE_NEW_PARAGRAPH = 1; |
|
45 const STATE_NEWLINE = 2; |
|
46 |
|
47 const MATCH_ATTR_CLASSID = '\((?<attr_class>[a-zA-Z0-9_]+)?(?:\#(?<attr_id>[a-zA-Z0-9_]+))?\)'; |
|
48 const MATCH_ATTR_STYLE = "\{(?<attr_style>[^\}\n]+)\}"; |
|
49 const MATCH_ATTR_LANG = '\[(?<attr_lang>[a-zA-Z_]+)\]'; |
|
50 const MATCH_ATTR_ALIGN = '(?<attr_align>\<\>?|\>|=)'; |
|
51 |
|
52 |
|
53 |
|
54 /** |
|
55 * Token tree |
|
56 * |
|
57 * @var Zend_Markup_TokenList |
|
58 */ |
|
59 protected $_tree; |
|
60 |
|
61 /** |
|
62 * Current token |
|
63 * |
|
64 * @var Zend_Markup_Token |
|
65 */ |
|
66 protected $_current; |
|
67 |
|
68 /** |
|
69 * Source to tokenize |
|
70 * |
|
71 * @var string |
|
72 */ |
|
73 protected $_value = ''; |
|
74 |
|
75 /** |
|
76 * Length of the value |
|
77 * |
|
78 * @var int |
|
79 */ |
|
80 protected $_valueLen = 0; |
|
81 |
|
82 /** |
|
83 * Current pointer |
|
84 * |
|
85 * @var int |
|
86 */ |
|
87 protected $_pointer = 0; |
|
88 |
|
89 /** |
|
90 * The buffer |
|
91 * |
|
92 * @var string |
|
93 */ |
|
94 protected $_buffer = ''; |
|
95 |
|
96 /** |
|
97 * Simple tag translation |
|
98 * |
|
99 * @var array |
|
100 */ |
|
101 protected $_simpleTags = array( |
|
102 '*' => 'strong', |
|
103 '**' => 'bold', |
|
104 '_' => 'emphasized', |
|
105 '__' => 'italic', |
|
106 '??' => 'citation', |
|
107 '-' => 'deleted', |
|
108 '+' => 'insert', |
|
109 '^' => 'superscript', |
|
110 '~' => 'subscript', |
|
111 '%' => 'span', |
|
112 // these are a little more complicated |
|
113 '@' => 'code', |
|
114 '!' => 'img', |
|
115 ); |
|
116 |
|
117 /** |
|
118 * Token array |
|
119 * |
|
120 * @var array |
|
121 */ |
|
122 protected $_tokens = array(); |
|
123 |
|
124 |
|
125 /** |
|
126 * Prepare the parsing of a Textile string, the real parsing is done in {@link _parse()} |
|
127 * |
|
128 * @param string $value |
|
129 * |
|
130 * @return array |
|
131 */ |
|
132 public function parse($value) |
|
133 { |
|
134 if (!is_string($value)) { |
|
135 /** |
|
136 * @see Zend_Markup_Parser_Exception |
|
137 */ |
|
138 require_once 'Zend/Markup/Parser/Exception.php'; |
|
139 throw new Zend_Markup_Parser_Exception('Value to parse should be a string.'); |
|
140 } |
|
141 if (empty($value)) { |
|
142 /** |
|
143 * @see Zend_Markup_Parser_Exception |
|
144 */ |
|
145 require_once 'Zend/Markup/Parser/Exception.php'; |
|
146 throw new Zend_Markup_Parser_Exception('Value to parse cannot be left empty.'); |
|
147 } |
|
148 |
|
149 // first make we only have LF newlines, also trim the value |
|
150 $this->_value = str_replace(array("\r\n", "\r"), "\n", $value); |
|
151 $this->_value = trim($this->_value); |
|
152 |
|
153 // initialize variables and tokenize |
|
154 $this->_valueLen = iconv_strlen($this->_value, 'UTF-8'); |
|
155 $this->_pointer = 0; |
|
156 $this->_buffer = ''; |
|
157 $this->_temp = array(); |
|
158 $this->_tokens = array(); |
|
159 |
|
160 $this->_tokenize(); |
|
161 |
|
162 // create the tree |
|
163 $this->_tree = new Zend_Markup_TokenList(); |
|
164 |
|
165 $this->_current = new Zend_Markup_Token('', Zend_Markup_Token::TYPE_NONE, 'Zend_Markup_Root'); |
|
166 $this->_tree->addChild($this->_current); |
|
167 |
|
168 $this->_createTree(); |
|
169 |
|
170 return $this->_tree; |
|
171 } |
|
172 |
|
173 /** |
|
174 * Tokenize a textile string |
|
175 * |
|
176 * @return array |
|
177 */ |
|
178 protected function _tokenize() |
|
179 { |
|
180 $state = self::STATE_NEW_PARAGRAPH; |
|
181 |
|
182 $attrsMatch = implode('|', array( |
|
183 self::MATCH_ATTR_CLASSID, |
|
184 self::MATCH_ATTR_STYLE, |
|
185 self::MATCH_ATTR_LANG, |
|
186 self::MATCH_ATTR_ALIGN |
|
187 )); |
|
188 |
|
189 $paragraph = ''; |
|
190 |
|
191 while ($this->_pointer < $this->_valueLen) { |
|
192 switch ($state) { |
|
193 case self::STATE_SCAN: |
|
194 $matches = array(); //[^\n*_?+~%@!-] |
|
195 $acronym = '(?<acronym>[A-Z]{2,})\((?<title>[^\)]+)\)'; |
|
196 $regex = '#\G(?<text>.*?)(?:' |
|
197 . "(?:(?<nl_paragraph>\n{2,})|(?<nl_break>\n))|" |
|
198 . '(?<tag>' |
|
199 . "(?<name>\*{1,2}|_{1,2}|\?{2}|\-|\+|\~|\^|%|@|!|$|{$acronym}" |
|
200 . '|":(?<url>[^\s]+)|")' |
|
201 . "(?:{$attrsMatch})*)" |
|
202 . ')#si'; |
|
203 preg_match($regex, $this->_value, $matches, null, $this->_pointer); |
|
204 |
|
205 $this->_pointer += strlen($matches[0]); |
|
206 |
|
207 if (!empty($matches['text'])) { |
|
208 $this->_buffer .= $matches['text']; |
|
209 } |
|
210 |
|
211 // first add the buffer |
|
212 if (!empty($this->_buffer)) { |
|
213 $this->_tokens[] = array( |
|
214 'tag' => $this->_buffer, |
|
215 'type' => Zend_Markup_Token::TYPE_NONE |
|
216 ); |
|
217 $this->_buffer = ''; |
|
218 } |
|
219 |
|
220 if (!empty($matches['nl_paragraph'])) { |
|
221 $this->_temp = array( |
|
222 'tag' => $matches['nl_paragraph'], |
|
223 'name' => 'p', |
|
224 'type' => Zend_Markup_Token::TYPE_TAG, |
|
225 'attributes' => array() |
|
226 ); |
|
227 |
|
228 $state = self::STATE_NEW_PARAGRAPH; |
|
229 } elseif (!empty($matches['nl_break'])) { |
|
230 $this->_tokens[] = array( |
|
231 'tag' => $matches['nl_break'], |
|
232 'name' => 'break', |
|
233 'type' => Zend_Markup_Token::TYPE_TAG, |
|
234 'attributes' => array() |
|
235 ); |
|
236 |
|
237 $state = self::STATE_NEWLINE; |
|
238 } elseif (!empty($matches['tag'])) { |
|
239 if (isset($this->_simpleTags[$matches['name']])) { |
|
240 // now add the new token |
|
241 $this->_tokens[] = array( |
|
242 'tag' => $matches['tag'], |
|
243 'type' => Zend_Markup_Token::TYPE_TAG, |
|
244 'name' => $this->_simpleTags[$matches['name']], |
|
245 'attributes' => $this->_extractAttributes($matches) |
|
246 ); |
|
247 } else { |
|
248 $attributes = $this->_extractAttributes($matches); |
|
249 if ($matches['tag'][0] == '"') { |
|
250 $name = 'url'; |
|
251 if (isset($matches['url'])) { |
|
252 $attributes['url'] = $matches['url']; |
|
253 } |
|
254 $this->_tokens[] = array( |
|
255 'tag' => $matches['tag'], |
|
256 'type' => Zend_Markup_Token::TYPE_TAG, |
|
257 'name' => $name, |
|
258 'attributes' => $attributes |
|
259 ); |
|
260 } else { |
|
261 $name = 'acronym'; |
|
262 $this->_tokens[] = array( |
|
263 'tag' => '', |
|
264 'type' => Zend_Markup_Token::TYPE_TAG, |
|
265 'name' => 'acronym', |
|
266 'attributes' => array( |
|
267 'title' => $matches['title'] |
|
268 ) |
|
269 ); |
|
270 $this->_tokens[] = array( |
|
271 'tag' => $matches['acronym'], |
|
272 'type' => Zend_Markup_Token::TYPE_NONE |
|
273 ); |
|
274 $this->_tokens[] = array( |
|
275 'tag' => '(' . $matches['title'] . ')', |
|
276 'type' => Zend_Markup_Token::TYPE_TAG, |
|
277 'name' => 'acronym', |
|
278 'attributes' => array() |
|
279 ); |
|
280 } |
|
281 } |
|
282 $state = self::STATE_SCAN; |
|
283 } |
|
284 |
|
285 break; |
|
286 case self::STATE_NEW_PARAGRAPH: |
|
287 if (empty($this->_temp)) { |
|
288 $this->_temp = array( |
|
289 'tag' => '', |
|
290 'name' => 'p', |
|
291 'type' => Zend_Markup_token::TYPE_TAG, |
|
292 'attributes' => array() |
|
293 ); |
|
294 } else { |
|
295 $this->_tokens[] = array( |
|
296 'tag' => "\n", |
|
297 'name' => 'p', |
|
298 'type' => Zend_Markup_Token::TYPE_TAG, |
|
299 'attributes' => array() |
|
300 ); |
|
301 $this->_temp['tag'] = substr($this->_temp['tag'], 1); |
|
302 } |
|
303 |
|
304 $matches = array(); //[^\n*_?+~%@!-] (\()? [^()]+ (?(1)\)) |
|
305 $regex = "#\G(?<name>(h[1-6]|p)|(?:\#|\*))(?:{$attrsMatch})*(?(2)\.\s|\s)#i"; |
|
306 if (!preg_match($regex, $this->_value, $matches, null, $this->_pointer)) { |
|
307 $this->_tokens[] = $this->_temp; |
|
308 $state = self::STATE_SCAN; |
|
309 break; |
|
310 } |
|
311 |
|
312 $this->_pointer += strlen($matches[0]); |
|
313 |
|
314 if ($matches['name'] == 'p') { |
|
315 $this->_temp['tag'] .= $matches[0]; |
|
316 $this->_temp['attributes'] = $this->_extractAttributes($matches); |
|
317 |
|
318 $this->_tokens[] = $this->_temp; |
|
319 $this->_temp = array(); |
|
320 } else { |
|
321 $this->_tokens[] = $this->_temp; |
|
322 $this->_temp = array(); |
|
323 |
|
324 $name = $matches['name']; |
|
325 $attributes = $this->_extractAttributes($matches); |
|
326 |
|
327 if ($name == '#') { |
|
328 $name = 'list'; |
|
329 $attributes['list'] = 'decimal'; |
|
330 } elseif ($name == '*') { |
|
331 $name = 'list'; |
|
332 } |
|
333 |
|
334 $this->_tokens[] = array( |
|
335 'tag' => $matches[0], |
|
336 'name' => $name, |
|
337 'type' => Zend_Markup_Token::TYPE_TAG, |
|
338 'attributes' => $attributes |
|
339 ); |
|
340 } |
|
341 |
|
342 $state = self::STATE_SCAN; |
|
343 break; |
|
344 case self::STATE_NEWLINE: |
|
345 $matches = array(); //[^\n*_?+~%@!-] |
|
346 $regex = "#\G(?<name>(h[1-6])|(?:\#|\*))(?:{$attrsMatch})*(?(2)\.\s|\s)#si"; |
|
347 if (!preg_match($regex, $this->_value, $matches, null, $this->_pointer)) { |
|
348 $state = self::STATE_SCAN; |
|
349 break; |
|
350 } |
|
351 |
|
352 $this->_pointer += strlen($matches[0]); |
|
353 |
|
354 $name = $matches['name']; |
|
355 $attributes = $this->_extractAttributes($matches); |
|
356 |
|
357 if ($name == '#') { |
|
358 $name = 'list'; |
|
359 $attributes['list'] = 'decimal'; |
|
360 } elseif ($name == '*') { |
|
361 $name = 'list'; |
|
362 } |
|
363 |
|
364 $this->_tokens[] = array( |
|
365 'tag' => $matches[0], |
|
366 'name' => $name, |
|
367 'type' => Zend_Markup_Token::TYPE_TAG, |
|
368 'attributes' => $attributes |
|
369 ); |
|
370 break; |
|
371 } |
|
372 } |
|
373 } |
|
374 |
|
375 /** |
|
376 * Create a tree from the tokenized text |
|
377 * |
|
378 * @return void |
|
379 */ |
|
380 protected function _createTree() |
|
381 { |
|
382 $inside = true; |
|
383 |
|
384 foreach ($this->_tokens as $key => $token) { |
|
385 // first check if the token is a stopper |
|
386 if ($this->_isStopper($token, $this->_current)) { |
|
387 if ($this->_current->getName() == 'li') { |
|
388 // list items are handled differently |
|
389 if (isset($this->_tokens[$key + 1]) |
|
390 && ($this->_tokens[$key + 1]['type'] == Zend_Markup_Token::TYPE_TAG) |
|
391 && ($this->_tokens[$key + 1]['name'] == 'list') |
|
392 ) { |
|
393 // the next item is a correct tag |
|
394 $this->_current->setStopper($token['tag']); |
|
395 |
|
396 $this->_current = $this->_current->getParent(); |
|
397 } else { |
|
398 // close the list |
|
399 $this->_current->setStopper($token['tag']); |
|
400 |
|
401 $this->_current = $this->_current->getParent()->getParent(); |
|
402 |
|
403 // go up in the tree until we found the end |
|
404 while ($this->_isStopper($token, $this->_current)) { |
|
405 $this->_current->setStopper($token['tag']); |
|
406 |
|
407 $this->_current = $this->_current->getParent(); |
|
408 } |
|
409 } |
|
410 } else { |
|
411 // go up in the tree until we found the end of stoppers |
|
412 while ($this->_isStopper($token, $this->_current)) { |
|
413 $this->_current->setStopper($token['tag']); |
|
414 |
|
415 if (!empty($token['attributes'])) { |
|
416 foreach ($token['attributes'] as $name => $value) { |
|
417 $this->_current->addAttribute($name, $value); |
|
418 } |
|
419 } |
|
420 |
|
421 $this->_current = $this->_current->getParent(); |
|
422 } |
|
423 } |
|
424 $inside = true; |
|
425 } elseif (($token['type'] == Zend_Markup_Token::TYPE_TAG) && $inside) { |
|
426 if ($token['name'] == 'break') { |
|
427 // add the newline and continue parsing |
|
428 $this->_current->addChild(new Zend_Markup_Token( |
|
429 $token['tag'], |
|
430 Zend_Markup_Token::TYPE_NONE, |
|
431 '', |
|
432 array(), |
|
433 $this->_current |
|
434 )); |
|
435 } else { |
|
436 // handle a list item |
|
437 if ($token['name'] == 'list') { |
|
438 $attributes = array(); |
|
439 if (isset($token['attributes']['list'])) { |
|
440 $attributes['list'] = $token['attributes']['list']; |
|
441 unset($token['attributes']['list']); |
|
442 } |
|
443 |
|
444 if ($this->_current->getName() != 'list') { |
|
445 // the list isn't started yet, create it |
|
446 $child = new Zend_Markup_Token( |
|
447 '', |
|
448 Zend_Markup_Token::TYPE_TAG, |
|
449 'list', |
|
450 $attributes, |
|
451 $this->_current |
|
452 ); |
|
453 |
|
454 $this->_current->addChild($child); |
|
455 |
|
456 $this->_current = $child; |
|
457 } |
|
458 $token['name'] = 'li'; |
|
459 } elseif (($token['name'] == 'img') || ($token['name'] == 'url')) { |
|
460 $inside = false; |
|
461 } |
|
462 |
|
463 // add the token |
|
464 $child = new Zend_Markup_Token( |
|
465 $token['tag'], |
|
466 Zend_Markup_Token::TYPE_TAG, |
|
467 $token['name'], |
|
468 $token['attributes'], |
|
469 $this->_current |
|
470 ); |
|
471 |
|
472 $this->_current->addChild($child); |
|
473 |
|
474 $this->_current = $child; |
|
475 } |
|
476 } else { |
|
477 // simply add the token as text |
|
478 $this->_current->addChild(new Zend_Markup_Token( |
|
479 $token['tag'], |
|
480 Zend_Markup_Token::TYPE_NONE, |
|
481 '', |
|
482 array(), |
|
483 $this->_current |
|
484 )); |
|
485 } |
|
486 } |
|
487 } |
|
488 |
|
489 /** |
|
490 * Check if a tag is a stopper |
|
491 * |
|
492 * @param array $token |
|
493 * @param Zend_Markup_Token $current |
|
494 * |
|
495 * @return bool |
|
496 */ |
|
497 protected function _isStopper(array $token, Zend_Markup_Token $current) |
|
498 { |
|
499 switch ($current->getName()) { |
|
500 case 'h1': |
|
501 case 'h2': |
|
502 case 'h3': |
|
503 case 'h4': |
|
504 case 'h5': |
|
505 case 'h6': |
|
506 case 'list': |
|
507 case 'li': |
|
508 if (($token['type'] == Zend_Markup_Token::TYPE_TAG) |
|
509 && (($token['name'] == 'break') || ($token['name'] == 'p')) |
|
510 ) { |
|
511 return true; |
|
512 } |
|
513 break; |
|
514 case 'break': |
|
515 return false; |
|
516 break; |
|
517 default: |
|
518 if (($token['type'] == Zend_Markup_Token::TYPE_TAG) && ($token['name'] == $current->getName())) { |
|
519 return true; |
|
520 } |
|
521 break; |
|
522 } |
|
523 return false; |
|
524 } |
|
525 |
|
526 /** |
|
527 * Extract the attributes |
|
528 * |
|
529 * @param array $matches |
|
530 * |
|
531 * @return array |
|
532 */ |
|
533 protected function _extractAttributes(array $matches) |
|
534 { |
|
535 $attributes = array(); |
|
536 |
|
537 if (!empty($matches['attr_class'])) { |
|
538 $attributes['class'] = $matches['attr_class']; |
|
539 } |
|
540 if (!empty($matches['attr_id'])) { |
|
541 $attributes['id'] = $matches['attr_id']; |
|
542 } |
|
543 if (!empty($matches['attr_style'])) { |
|
544 $attributes['style'] = $matches['attr_style']; |
|
545 } |
|
546 if (!empty($matches['attr_lang'])) { |
|
547 $attributes['lang'] = $matches['attr_lang']; |
|
548 } |
|
549 if (!empty($matches['attr_align'])) { |
|
550 switch ($matches['attr_align']) { |
|
551 case '=': |
|
552 $attributes['align'] = 'center'; |
|
553 break; |
|
554 case '>': |
|
555 $attributes['align'] = 'right'; |
|
556 break; |
|
557 case '<>': |
|
558 $attributes['align'] = 'justify'; |
|
559 break; |
|
560 default: |
|
561 case '<': |
|
562 $attributes['align'] = 'left'; |
|
563 break; |
|
564 } |
|
565 } |
|
566 |
|
567 return $attributes; |
|
568 } |
|
569 |
|
570 } |