web/enmi/Zend/Markup/Parser/Textile.php
changeset 19 1c2f13fd785c
parent 0 4eba9c11703f
equal deleted inserted replaced
18:bd595ad770fc 19:1c2f13fd785c
       
     1 <?php
       
     2 /**
       
     3  * Zend Framework
       
     4  *
       
     5  * LICENSE
       
     6  *
       
     7  * This source file is subject to the new BSD license that is bundled
       
     8  * with this package in the file LICENSE.txt.
       
     9  * It is also available through the world-wide-web at this URL:
       
    10  * http://framework.zend.com/license/new-bsd
       
    11  * If you did not receive a copy of the license and are unable to
       
    12  * obtain it through the world-wide-web, please send an email
       
    13  * to license@zend.com so we can send you a copy immediately.
       
    14  *
       
    15  * @category   Zend
       
    16  * @package    Zend_Markup
       
    17  * @subpackage Parser
       
    18  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    19  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    20  * @version    $Id: Textile.php 20277 2010-01-14 14:17:12Z kokx $
       
    21  */
       
    22 
       
    23 /**
       
    24  * @see Zend_Markup_TokenList
       
    25  */
       
    26 require_once 'Zend/Markup/TokenList.php';
       
    27 
       
    28 /**
       
    29  * @see Zend_Markup_Parser_ParserInterface
       
    30  */
       
    31 require_once 'Zend/Markup/Parser/ParserInterface.php';
       
    32 
       
    33 /**
       
    34  * @category   Zend
       
    35  * @package    Zend_Markup
       
    36  * @subpackage Parser
       
    37  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    38  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    39  */
       
    40 class Zend_Markup_Parser_Textile implements Zend_Markup_Parser_ParserInterface
       
    41 {
       
    42 
       
    43     const STATE_SCAN          = 0;
       
    44     const STATE_NEW_PARAGRAPH = 1;
       
    45     const STATE_NEWLINE       = 2;
       
    46 
       
    47     const MATCH_ATTR_CLASSID = '\((?<attr_class>[a-zA-Z0-9_]+)?(?:\#(?<attr_id>[a-zA-Z0-9_]+))?\)';
       
    48     const MATCH_ATTR_STYLE   = "\{(?<attr_style>[^\}\n]+)\}";
       
    49     const MATCH_ATTR_LANG    = '\[(?<attr_lang>[a-zA-Z_]+)\]';
       
    50     const MATCH_ATTR_ALIGN   = '(?<attr_align>\<\>?|\>|=)';
       
    51 
       
    52 
       
    53 
       
    54     /**
       
    55      * Token tree
       
    56      *
       
    57      * @var Zend_Markup_TokenList
       
    58      */
       
    59     protected $_tree;
       
    60 
       
    61     /**
       
    62      * Current token
       
    63      *
       
    64      * @var Zend_Markup_Token
       
    65      */
       
    66     protected $_current;
       
    67 
       
    68     /**
       
    69      * Source to tokenize
       
    70      *
       
    71      * @var string
       
    72      */
       
    73     protected $_value = '';
       
    74 
       
    75     /**
       
    76      * Length of the value
       
    77      *
       
    78      * @var int
       
    79      */
       
    80     protected $_valueLen = 0;
       
    81 
       
    82     /**
       
    83      * Current pointer
       
    84      *
       
    85      * @var int
       
    86      */
       
    87     protected $_pointer = 0;
       
    88 
       
    89     /**
       
    90      * The buffer
       
    91      *
       
    92      * @var string
       
    93      */
       
    94     protected $_buffer = '';
       
    95 
       
    96     /**
       
    97      * Simple tag translation
       
    98      *
       
    99      * @var array
       
   100      */
       
   101     protected $_simpleTags = array(
       
   102         '*'  => 'strong',
       
   103         '**' => 'bold',
       
   104         '_'  => 'emphasized',
       
   105         '__' => 'italic',
       
   106         '??' => 'citation',
       
   107         '-'  => 'deleted',
       
   108         '+'  => 'insert',
       
   109         '^'  => 'superscript',
       
   110         '~'  => 'subscript',
       
   111         '%'  => 'span',
       
   112         // these are a little more complicated
       
   113         '@'  => 'code',
       
   114         '!'  => 'img',
       
   115     );
       
   116 
       
   117     /**
       
   118      * Token array
       
   119      *
       
   120      * @var array
       
   121      */
       
   122     protected $_tokens = array();
       
   123 
       
   124 
       
   125     /**
       
   126      * Prepare the parsing of a Textile string, the real parsing is done in {@link _parse()}
       
   127      *
       
   128      * @param string $value
       
   129      *
       
   130      * @return array
       
   131      */
       
   132     public function parse($value)
       
   133     {
       
   134         if (!is_string($value)) {
       
   135             /**
       
   136              * @see Zend_Markup_Parser_Exception
       
   137              */
       
   138             require_once 'Zend/Markup/Parser/Exception.php';
       
   139             throw new Zend_Markup_Parser_Exception('Value to parse should be a string.');
       
   140         }
       
   141         if (empty($value)) {
       
   142             /**
       
   143              * @see Zend_Markup_Parser_Exception
       
   144              */
       
   145             require_once 'Zend/Markup/Parser/Exception.php';
       
   146             throw new Zend_Markup_Parser_Exception('Value to parse cannot be left empty.');
       
   147         }
       
   148 
       
   149         // first make we only have LF newlines, also trim the value
       
   150         $this->_value = str_replace(array("\r\n", "\r"), "\n", $value);
       
   151         $this->_value = trim($this->_value);
       
   152 
       
   153         // initialize variables and tokenize
       
   154         $this->_valueLen = iconv_strlen($this->_value, 'UTF-8');
       
   155         $this->_pointer  = 0;
       
   156         $this->_buffer   = '';
       
   157         $this->_temp     = array();
       
   158         $this->_tokens   = array();
       
   159 
       
   160         $this->_tokenize();
       
   161 
       
   162         // create the tree
       
   163         $this->_tree     = new Zend_Markup_TokenList();
       
   164 
       
   165         $this->_current  = new Zend_Markup_Token('', Zend_Markup_Token::TYPE_NONE, 'Zend_Markup_Root');
       
   166         $this->_tree->addChild($this->_current);
       
   167 
       
   168         $this->_createTree();
       
   169 
       
   170         return $this->_tree;
       
   171     }
       
   172 
       
   173     /**
       
   174      * Tokenize a textile string
       
   175      *
       
   176      * @return array
       
   177      */
       
   178     protected function _tokenize()
       
   179     {
       
   180         $state    = self::STATE_NEW_PARAGRAPH;
       
   181 
       
   182         $attrsMatch = implode('|', array(
       
   183             self::MATCH_ATTR_CLASSID,
       
   184             self::MATCH_ATTR_STYLE,
       
   185             self::MATCH_ATTR_LANG,
       
   186             self::MATCH_ATTR_ALIGN
       
   187         ));
       
   188 
       
   189         $paragraph = '';
       
   190 
       
   191         while ($this->_pointer < $this->_valueLen) {
       
   192             switch ($state) {
       
   193                 case self::STATE_SCAN:
       
   194                     $matches = array(); //[^\n*_?+~%@!-]
       
   195                     $acronym = '(?<acronym>[A-Z]{2,})\((?<title>[^\)]+)\)';
       
   196                     $regex   = '#\G(?<text>.*?)(?:'
       
   197                              . "(?:(?<nl_paragraph>\n{2,})|(?<nl_break>\n))|"
       
   198                              . '(?<tag>'
       
   199                              . "(?<name>\*{1,2}|_{1,2}|\?{2}|\-|\+|\~|\^|%|@|!|$|{$acronym}"
       
   200                              . '|":(?<url>[^\s]+)|")'
       
   201                              . "(?:{$attrsMatch})*)"
       
   202                              . ')#si';
       
   203                     preg_match($regex, $this->_value, $matches, null, $this->_pointer);
       
   204 
       
   205                     $this->_pointer += strlen($matches[0]);
       
   206 
       
   207                     if (!empty($matches['text'])) {
       
   208                         $this->_buffer .= $matches['text'];
       
   209                     }
       
   210 
       
   211                     // first add the buffer
       
   212                     if (!empty($this->_buffer)) {
       
   213                         $this->_tokens[] = array(
       
   214                             'tag'  => $this->_buffer,
       
   215                             'type' => Zend_Markup_Token::TYPE_NONE
       
   216                         );
       
   217                         $this->_buffer = '';
       
   218                     }
       
   219 
       
   220                     if (!empty($matches['nl_paragraph'])) {
       
   221                         $this->_temp = array(
       
   222                             'tag'        => $matches['nl_paragraph'],
       
   223                             'name'       => 'p',
       
   224                             'type'       => Zend_Markup_Token::TYPE_TAG,
       
   225                             'attributes' => array()
       
   226                         );
       
   227 
       
   228                         $state = self::STATE_NEW_PARAGRAPH;
       
   229                     } elseif (!empty($matches['nl_break'])) {
       
   230                         $this->_tokens[] = array(
       
   231                             'tag'        => $matches['nl_break'],
       
   232                             'name'       => 'break',
       
   233                             'type'       => Zend_Markup_Token::TYPE_TAG,
       
   234                             'attributes' => array()
       
   235                         );
       
   236 
       
   237                         $state   = self::STATE_NEWLINE;
       
   238                     } elseif (!empty($matches['tag'])) {
       
   239                         if (isset($this->_simpleTags[$matches['name']])) {
       
   240                             // now add the new token
       
   241                             $this->_tokens[] = array(
       
   242                                 'tag'        => $matches['tag'],
       
   243                                 'type'       => Zend_Markup_Token::TYPE_TAG,
       
   244                                 'name'       => $this->_simpleTags[$matches['name']],
       
   245                                 'attributes' => $this->_extractAttributes($matches)
       
   246                             );
       
   247                         } else {
       
   248                             $attributes = $this->_extractAttributes($matches);
       
   249                             if ($matches['tag'][0] == '"') {
       
   250                                 $name = 'url';
       
   251                                 if (isset($matches['url'])) {
       
   252                                     $attributes['url'] = $matches['url'];
       
   253                                 }
       
   254                                 $this->_tokens[] = array(
       
   255                                     'tag'        => $matches['tag'],
       
   256                                     'type'       => Zend_Markup_Token::TYPE_TAG,
       
   257                                     'name'       => $name,
       
   258                                     'attributes' => $attributes
       
   259                                 );
       
   260                             } else {
       
   261                                 $name = 'acronym';
       
   262                                 $this->_tokens[] = array(
       
   263                                     'tag'        => '',
       
   264                                     'type'       => Zend_Markup_Token::TYPE_TAG,
       
   265                                     'name'       => 'acronym',
       
   266                                     'attributes' => array(
       
   267                                         'title' => $matches['title']
       
   268                                     )
       
   269                                 );
       
   270                                 $this->_tokens[] = array(
       
   271                                     'tag'  => $matches['acronym'],
       
   272                                     'type' => Zend_Markup_Token::TYPE_NONE
       
   273                                 );
       
   274                                 $this->_tokens[] = array(
       
   275                                     'tag'        => '(' . $matches['title'] . ')',
       
   276                                     'type'       => Zend_Markup_Token::TYPE_TAG,
       
   277                                     'name'       => 'acronym',
       
   278                                     'attributes' => array()
       
   279                                 );
       
   280                             }
       
   281                         }
       
   282                         $state = self::STATE_SCAN;
       
   283                     }
       
   284 
       
   285                     break;
       
   286                 case self::STATE_NEW_PARAGRAPH:
       
   287                     if (empty($this->_temp)) {
       
   288                         $this->_temp = array(
       
   289                             'tag'        => '',
       
   290                             'name'       => 'p',
       
   291                             'type'       => Zend_Markup_token::TYPE_TAG,
       
   292                             'attributes' => array()
       
   293                         );
       
   294                     } else {
       
   295                         $this->_tokens[] = array(
       
   296                             'tag'        => "\n",
       
   297                             'name'       => 'p',
       
   298                             'type'       => Zend_Markup_Token::TYPE_TAG,
       
   299                             'attributes' => array()
       
   300                         );
       
   301                         $this->_temp['tag'] = substr($this->_temp['tag'], 1);
       
   302                     }
       
   303 
       
   304                     $matches = array(); //[^\n*_?+~%@!-] (\()? [^()]+ (?(1)\))
       
   305                     $regex   = "#\G(?<name>(h[1-6]|p)|(?:\#|\*))(?:{$attrsMatch})*(?(2)\.\s|\s)#i";
       
   306                     if (!preg_match($regex, $this->_value, $matches, null, $this->_pointer)) {
       
   307                         $this->_tokens[] = $this->_temp;
       
   308                         $state    = self::STATE_SCAN;
       
   309                         break;
       
   310                     }
       
   311 
       
   312                     $this->_pointer += strlen($matches[0]);
       
   313 
       
   314                     if ($matches['name'] == 'p') {
       
   315                         $this->_temp['tag']       .= $matches[0];
       
   316                         $this->_temp['attributes'] = $this->_extractAttributes($matches);
       
   317 
       
   318                         $this->_tokens[]    = $this->_temp;
       
   319                         $this->_temp = array();
       
   320                     } else {
       
   321                         $this->_tokens[] = $this->_temp;
       
   322                         $this->_temp = array();
       
   323 
       
   324                         $name       = $matches['name'];
       
   325                         $attributes = $this->_extractAttributes($matches);
       
   326 
       
   327                         if ($name == '#') {
       
   328                             $name               = 'list';
       
   329                             $attributes['list'] = 'decimal';
       
   330                         } elseif ($name == '*') {
       
   331                             $name = 'list';
       
   332                         }
       
   333 
       
   334                         $this->_tokens[] = array(
       
   335                             'tag'        => $matches[0],
       
   336                             'name'       => $name,
       
   337                             'type'       => Zend_Markup_Token::TYPE_TAG,
       
   338                             'attributes' => $attributes
       
   339                         );
       
   340                     }
       
   341 
       
   342                     $state = self::STATE_SCAN;
       
   343                     break;
       
   344                 case self::STATE_NEWLINE:
       
   345                     $matches = array(); //[^\n*_?+~%@!-]
       
   346                     $regex   = "#\G(?<name>(h[1-6])|(?:\#|\*))(?:{$attrsMatch})*(?(2)\.\s|\s)#si";
       
   347                     if (!preg_match($regex, $this->_value, $matches, null, $this->_pointer)) {
       
   348                         $state = self::STATE_SCAN;
       
   349                         break;
       
   350                     }
       
   351 
       
   352                     $this->_pointer += strlen($matches[0]);
       
   353 
       
   354                     $name       = $matches['name'];
       
   355                     $attributes = $this->_extractAttributes($matches);
       
   356 
       
   357                     if ($name == '#') {
       
   358                         $name               = 'list';
       
   359                         $attributes['list'] = 'decimal';
       
   360                     } elseif ($name == '*') {
       
   361                         $name = 'list';
       
   362                     }
       
   363 
       
   364                     $this->_tokens[] = array(
       
   365                         'tag'        => $matches[0],
       
   366                         'name'       => $name,
       
   367                         'type'       => Zend_Markup_Token::TYPE_TAG,
       
   368                         'attributes' => $attributes
       
   369                     );
       
   370                     break;
       
   371             }
       
   372         }
       
   373     }
       
   374 
       
   375     /**
       
   376      * Create a tree from the tokenized text
       
   377      *
       
   378      * @return void
       
   379      */
       
   380     protected function _createTree()
       
   381     {
       
   382         $inside = true;
       
   383 
       
   384         foreach ($this->_tokens as $key => $token) {
       
   385             // first check if the token is a stopper
       
   386             if ($this->_isStopper($token, $this->_current)) {
       
   387                 if ($this->_current->getName() == 'li') {
       
   388                     // list items are handled differently
       
   389                     if (isset($this->_tokens[$key + 1])
       
   390                         && ($this->_tokens[$key + 1]['type'] == Zend_Markup_Token::TYPE_TAG)
       
   391                         && ($this->_tokens[$key + 1]['name'] == 'list')
       
   392                     ) {
       
   393                         // the next item is a correct tag
       
   394                         $this->_current->setStopper($token['tag']);
       
   395 
       
   396                         $this->_current = $this->_current->getParent();
       
   397                     } else {
       
   398                         // close the list
       
   399                         $this->_current->setStopper($token['tag']);
       
   400 
       
   401                         $this->_current = $this->_current->getParent()->getParent();
       
   402 
       
   403                         // go up in the tree until we found the end
       
   404                         while ($this->_isStopper($token, $this->_current)) {
       
   405                             $this->_current->setStopper($token['tag']);
       
   406 
       
   407                             $this->_current = $this->_current->getParent();
       
   408                         }
       
   409                     }
       
   410                 } else {
       
   411                     // go up in the tree until we found the end of stoppers
       
   412                     while ($this->_isStopper($token, $this->_current)) {
       
   413                         $this->_current->setStopper($token['tag']);
       
   414 
       
   415                         if (!empty($token['attributes'])) {
       
   416                             foreach ($token['attributes'] as $name => $value) {
       
   417                                 $this->_current->addAttribute($name, $value);
       
   418                             }
       
   419                         }
       
   420 
       
   421                         $this->_current = $this->_current->getParent();
       
   422                     }
       
   423                 }
       
   424                 $inside = true;
       
   425             } elseif (($token['type'] == Zend_Markup_Token::TYPE_TAG) && $inside) {
       
   426                 if ($token['name'] == 'break') {
       
   427                     // add the newline and continue parsing
       
   428                     $this->_current->addChild(new Zend_Markup_Token(
       
   429                         $token['tag'],
       
   430                         Zend_Markup_Token::TYPE_NONE,
       
   431                         '',
       
   432                         array(),
       
   433                         $this->_current
       
   434                     ));
       
   435                 } else {
       
   436                     // handle a list item
       
   437                     if ($token['name'] == 'list') {
       
   438                         $attributes = array();
       
   439                         if (isset($token['attributes']['list'])) {
       
   440                             $attributes['list'] = $token['attributes']['list'];
       
   441                             unset($token['attributes']['list']);
       
   442                         }
       
   443 
       
   444                         if ($this->_current->getName() != 'list') {
       
   445                             // the list isn't started yet, create it
       
   446                             $child = new Zend_Markup_Token(
       
   447                                 '',
       
   448                                 Zend_Markup_Token::TYPE_TAG,
       
   449                                 'list',
       
   450                                 $attributes,
       
   451                                 $this->_current
       
   452                             );
       
   453 
       
   454                             $this->_current->addChild($child);
       
   455 
       
   456                             $this->_current = $child;
       
   457                         }
       
   458                         $token['name'] = 'li';
       
   459                     } elseif (($token['name'] == 'img') || ($token['name'] == 'url')) {
       
   460                         $inside = false;
       
   461                     }
       
   462 
       
   463                     // add the token
       
   464                     $child = new Zend_Markup_Token(
       
   465                         $token['tag'],
       
   466                         Zend_Markup_Token::TYPE_TAG,
       
   467                         $token['name'],
       
   468                         $token['attributes'],
       
   469                         $this->_current
       
   470                     );
       
   471 
       
   472                     $this->_current->addChild($child);
       
   473 
       
   474                     $this->_current = $child;
       
   475                 }
       
   476             } else {
       
   477                 // simply add the token as text
       
   478                 $this->_current->addChild(new Zend_Markup_Token(
       
   479                     $token['tag'],
       
   480                     Zend_Markup_Token::TYPE_NONE,
       
   481                     '',
       
   482                     array(),
       
   483                     $this->_current
       
   484                 ));
       
   485             }
       
   486         }
       
   487     }
       
   488 
       
   489     /**
       
   490      * Check if a tag is a stopper
       
   491      *
       
   492      * @param array $token
       
   493      * @param Zend_Markup_Token $current
       
   494      *
       
   495      * @return bool
       
   496      */
       
   497     protected function _isStopper(array $token, Zend_Markup_Token $current)
       
   498     {
       
   499         switch ($current->getName()) {
       
   500             case 'h1':
       
   501             case 'h2':
       
   502             case 'h3':
       
   503             case 'h4':
       
   504             case 'h5':
       
   505             case 'h6':
       
   506             case 'list':
       
   507             case 'li':
       
   508                 if (($token['type'] == Zend_Markup_Token::TYPE_TAG)
       
   509                     && (($token['name'] == 'break') || ($token['name'] == 'p'))
       
   510                 ) {
       
   511                     return true;
       
   512                 }
       
   513                 break;
       
   514             case 'break':
       
   515                 return false;
       
   516                 break;
       
   517             default:
       
   518                 if (($token['type'] == Zend_Markup_Token::TYPE_TAG) && ($token['name'] == $current->getName())) {
       
   519                     return true;
       
   520                 }
       
   521                 break;
       
   522         }
       
   523         return false;
       
   524     }
       
   525 
       
   526     /**
       
   527      * Extract the attributes
       
   528      *
       
   529      * @param array $matches
       
   530      *
       
   531      * @return array
       
   532      */
       
   533     protected function _extractAttributes(array $matches)
       
   534     {
       
   535         $attributes = array();
       
   536 
       
   537         if (!empty($matches['attr_class'])) {
       
   538             $attributes['class'] = $matches['attr_class'];
       
   539         }
       
   540         if (!empty($matches['attr_id'])) {
       
   541             $attributes['id'] = $matches['attr_id'];
       
   542         }
       
   543         if (!empty($matches['attr_style'])) {
       
   544             $attributes['style'] = $matches['attr_style'];
       
   545         }
       
   546         if (!empty($matches['attr_lang'])) {
       
   547             $attributes['lang'] = $matches['attr_lang'];
       
   548         }
       
   549         if (!empty($matches['attr_align'])) {
       
   550             switch ($matches['attr_align']) {
       
   551                 case '=':
       
   552                     $attributes['align'] = 'center';
       
   553                     break;
       
   554                 case '>':
       
   555                     $attributes['align'] = 'right';
       
   556                     break;
       
   557                 case '<>':
       
   558                     $attributes['align'] = 'justify';
       
   559                     break;
       
   560                 default:
       
   561                 case '<':
       
   562                     $attributes['align'] = 'left';
       
   563                     break;
       
   564             }
       
   565         }
       
   566 
       
   567         return $attributes;
       
   568     }
       
   569 
       
   570 }