diff -r 5b37998e522e -r 162c1de6545a web/lib/Zend/Markup/Parser/Textile.php --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/lib/Zend/Markup/Parser/Textile.php Fri Mar 11 15:05:35 2011 +0100 @@ -0,0 +1,570 @@ +[a-zA-Z0-9_]+)?(?:\#(?[a-zA-Z0-9_]+))?\)'; + const MATCH_ATTR_STYLE = "\{(?[^\}\n]+)\}"; + const MATCH_ATTR_LANG = '\[(?[a-zA-Z_]+)\]'; + const MATCH_ATTR_ALIGN = '(?\<\>?|\>|=)'; + + + + /** + * Token tree + * + * @var Zend_Markup_TokenList + */ + protected $_tree; + + /** + * Current token + * + * @var Zend_Markup_Token + */ + protected $_current; + + /** + * Source to tokenize + * + * @var string + */ + protected $_value = ''; + + /** + * Length of the value + * + * @var int + */ + protected $_valueLen = 0; + + /** + * Current pointer + * + * @var int + */ + protected $_pointer = 0; + + /** + * The buffer + * + * @var string + */ + protected $_buffer = ''; + + /** + * Simple tag translation + * + * @var array + */ + protected $_simpleTags = array( + '*' => 'strong', + '**' => 'bold', + '_' => 'emphasized', + '__' => 'italic', + '??' => 'citation', + '-' => 'deleted', + '+' => 'insert', + '^' => 'superscript', + '~' => 'subscript', + '%' => 'span', + // these are a little more complicated + '@' => 'code', + '!' => 'img', + ); + + /** + * Token array + * + * @var array + */ + protected $_tokens = array(); + + + /** + * Prepare the parsing of a Textile string, the real parsing is done in {@link _parse()} + * + * @param string $value + * + * @return array + */ + public function parse($value) + { + if (!is_string($value)) { + /** + * @see Zend_Markup_Parser_Exception + */ + require_once 'Zend/Markup/Parser/Exception.php'; + throw new Zend_Markup_Parser_Exception('Value to parse should be a string.'); + } + if (empty($value)) { + /** + * @see Zend_Markup_Parser_Exception + */ + require_once 'Zend/Markup/Parser/Exception.php'; + throw new Zend_Markup_Parser_Exception('Value to parse cannot be left empty.'); + } + + // first make we only have LF newlines, also trim the value + $this->_value = str_replace(array("\r\n", "\r"), "\n", $value); + $this->_value = trim($this->_value); + + // initialize variables and tokenize + $this->_valueLen = iconv_strlen($this->_value, 'UTF-8'); + $this->_pointer = 0; + $this->_buffer = ''; + $this->_temp = array(); + $this->_tokens = array(); + + $this->_tokenize(); + + // create the tree + $this->_tree = new Zend_Markup_TokenList(); + + $this->_current = new Zend_Markup_Token('', Zend_Markup_Token::TYPE_NONE, 'Zend_Markup_Root'); + $this->_tree->addChild($this->_current); + + $this->_createTree(); + + return $this->_tree; + } + + /** + * Tokenize a textile string + * + * @return array + */ + protected function _tokenize() + { + $state = self::STATE_NEW_PARAGRAPH; + + $attrsMatch = implode('|', array( + self::MATCH_ATTR_CLASSID, + self::MATCH_ATTR_STYLE, + self::MATCH_ATTR_LANG, + self::MATCH_ATTR_ALIGN + )); + + $paragraph = ''; + + while ($this->_pointer < $this->_valueLen) { + switch ($state) { + case self::STATE_SCAN: + $matches = array(); //[^\n*_?+~%@!-] + $acronym = '(?[A-Z]{2,})\((?[^\)]+)\)'; + $regex = '#\G(?<text>.*?)(?:' + . "(?:(?<nl_paragraph>\n{2,})|(?<nl_break>\n))|" + . '(?<tag>' + . "(?<name>\*{1,2}|_{1,2}|\?{2}|\-|\+|\~|\^|%|@|!|$|{$acronym}" + . '|":(?<url>[^\s]+)|")' + . "(?:{$attrsMatch})*)" + . ')#si'; + preg_match($regex, $this->_value, $matches, null, $this->_pointer); + + $this->_pointer += strlen($matches[0]); + + if (!empty($matches['text'])) { + $this->_buffer .= $matches['text']; + } + + // first add the buffer + if (!empty($this->_buffer)) { + $this->_tokens[] = array( + 'tag' => $this->_buffer, + 'type' => Zend_Markup_Token::TYPE_NONE + ); + $this->_buffer = ''; + } + + if (!empty($matches['nl_paragraph'])) { + $this->_temp = array( + 'tag' => $matches['nl_paragraph'], + 'name' => 'p', + 'type' => Zend_Markup_Token::TYPE_TAG, + 'attributes' => array() + ); + + $state = self::STATE_NEW_PARAGRAPH; + } elseif (!empty($matches['nl_break'])) { + $this->_tokens[] = array( + 'tag' => $matches['nl_break'], + 'name' => 'break', + 'type' => Zend_Markup_Token::TYPE_TAG, + 'attributes' => array() + ); + + $state = self::STATE_NEWLINE; + } elseif (!empty($matches['tag'])) { + if (isset($this->_simpleTags[$matches['name']])) { + // now add the new token + $this->_tokens[] = array( + 'tag' => $matches['tag'], + 'type' => Zend_Markup_Token::TYPE_TAG, + 'name' => $this->_simpleTags[$matches['name']], + 'attributes' => $this->_extractAttributes($matches) + ); + } else { + $attributes = $this->_extractAttributes($matches); + if ($matches['tag'][0] == '"') { + $name = 'url'; + if (isset($matches['url'])) { + $attributes['url'] = $matches['url']; + } + $this->_tokens[] = array( + 'tag' => $matches['tag'], + 'type' => Zend_Markup_Token::TYPE_TAG, + 'name' => $name, + 'attributes' => $attributes + ); + } else { + $name = 'acronym'; + $this->_tokens[] = array( + 'tag' => '', + 'type' => Zend_Markup_Token::TYPE_TAG, + 'name' => 'acronym', + 'attributes' => array( + 'title' => $matches['title'] + ) + ); + $this->_tokens[] = array( + 'tag' => $matches['acronym'], + 'type' => Zend_Markup_Token::TYPE_NONE + ); + $this->_tokens[] = array( + 'tag' => '(' . $matches['title'] . ')', + 'type' => Zend_Markup_Token::TYPE_TAG, + 'name' => 'acronym', + 'attributes' => array() + ); + } + } + $state = self::STATE_SCAN; + } + + break; + case self::STATE_NEW_PARAGRAPH: + if (empty($this->_temp)) { + $this->_temp = array( + 'tag' => '', + 'name' => 'p', + 'type' => Zend_Markup_token::TYPE_TAG, + 'attributes' => array() + ); + } else { + $this->_tokens[] = array( + 'tag' => "\n", + 'name' => 'p', + 'type' => Zend_Markup_Token::TYPE_TAG, + 'attributes' => array() + ); + $this->_temp['tag'] = substr($this->_temp['tag'], 1); + } + + $matches = array(); //[^\n*_?+~%@!-] (\()? [^()]+ (?(1)\)) + $regex = "#\G(?<name>(h[1-6]|p)|(?:\#|\*))(?:{$attrsMatch})*(?(2)\.\s|\s)#i"; + if (!preg_match($regex, $this->_value, $matches, null, $this->_pointer)) { + $this->_tokens[] = $this->_temp; + $state = self::STATE_SCAN; + break; + } + + $this->_pointer += strlen($matches[0]); + + if ($matches['name'] == 'p') { + $this->_temp['tag'] .= $matches[0]; + $this->_temp['attributes'] = $this->_extractAttributes($matches); + + $this->_tokens[] = $this->_temp; + $this->_temp = array(); + } else { + $this->_tokens[] = $this->_temp; + $this->_temp = array(); + + $name = $matches['name']; + $attributes = $this->_extractAttributes($matches); + + if ($name == '#') { + $name = 'list'; + $attributes['list'] = 'decimal'; + } elseif ($name == '*') { + $name = 'list'; + } + + $this->_tokens[] = array( + 'tag' => $matches[0], + 'name' => $name, + 'type' => Zend_Markup_Token::TYPE_TAG, + 'attributes' => $attributes + ); + } + + $state = self::STATE_SCAN; + break; + case self::STATE_NEWLINE: + $matches = array(); //[^\n*_?+~%@!-] + $regex = "#\G(?<name>(h[1-6])|(?:\#|\*))(?:{$attrsMatch})*(?(2)\.\s|\s)#si"; + if (!preg_match($regex, $this->_value, $matches, null, $this->_pointer)) { + $state = self::STATE_SCAN; + break; + } + + $this->_pointer += strlen($matches[0]); + + $name = $matches['name']; + $attributes = $this->_extractAttributes($matches); + + if ($name == '#') { + $name = 'list'; + $attributes['list'] = 'decimal'; + } elseif ($name == '*') { + $name = 'list'; + } + + $this->_tokens[] = array( + 'tag' => $matches[0], + 'name' => $name, + 'type' => Zend_Markup_Token::TYPE_TAG, + 'attributes' => $attributes + ); + break; + } + } + } + + /** + * Create a tree from the tokenized text + * + * @return void + */ + protected function _createTree() + { + $inside = true; + + foreach ($this->_tokens as $key => $token) { + // first check if the token is a stopper + if ($this->_isStopper($token, $this->_current)) { + if ($this->_current->getName() == 'li') { + // list items are handled differently + if (isset($this->_tokens[$key + 1]) + && ($this->_tokens[$key + 1]['type'] == Zend_Markup_Token::TYPE_TAG) + && ($this->_tokens[$key + 1]['name'] == 'list') + ) { + // the next item is a correct tag + $this->_current->setStopper($token['tag']); + + $this->_current = $this->_current->getParent(); + } else { + // close the list + $this->_current->setStopper($token['tag']); + + $this->_current = $this->_current->getParent()->getParent(); + + // go up in the tree until we found the end + while ($this->_isStopper($token, $this->_current)) { + $this->_current->setStopper($token['tag']); + + $this->_current = $this->_current->getParent(); + } + } + } else { + // go up in the tree until we found the end of stoppers + while ($this->_isStopper($token, $this->_current)) { + $this->_current->setStopper($token['tag']); + + if (!empty($token['attributes'])) { + foreach ($token['attributes'] as $name => $value) { + $this->_current->addAttribute($name, $value); + } + } + + $this->_current = $this->_current->getParent(); + } + } + $inside = true; + } elseif (($token['type'] == Zend_Markup_Token::TYPE_TAG) && $inside) { + if ($token['name'] == 'break') { + // add the newline and continue parsing + $this->_current->addChild(new Zend_Markup_Token( + $token['tag'], + Zend_Markup_Token::TYPE_NONE, + '', + array(), + $this->_current + )); + } else { + // handle a list item + if ($token['name'] == 'list') { + $attributes = array(); + if (isset($token['attributes']['list'])) { + $attributes['list'] = $token['attributes']['list']; + unset($token['attributes']['list']); + } + + if ($this->_current->getName() != 'list') { + // the list isn't started yet, create it + $child = new Zend_Markup_Token( + '', + Zend_Markup_Token::TYPE_TAG, + 'list', + $attributes, + $this->_current + ); + + $this->_current->addChild($child); + + $this->_current = $child; + } + $token['name'] = 'li'; + } elseif (($token['name'] == 'img') || ($token['name'] == 'url')) { + $inside = false; + } + + // add the token + $child = new Zend_Markup_Token( + $token['tag'], + Zend_Markup_Token::TYPE_TAG, + $token['name'], + $token['attributes'], + $this->_current + ); + + $this->_current->addChild($child); + + $this->_current = $child; + } + } else { + // simply add the token as text + $this->_current->addChild(new Zend_Markup_Token( + $token['tag'], + Zend_Markup_Token::TYPE_NONE, + '', + array(), + $this->_current + )); + } + } + } + + /** + * Check if a tag is a stopper + * + * @param array $token + * @param Zend_Markup_Token $current + * + * @return bool + */ + protected function _isStopper(array $token, Zend_Markup_Token $current) + { + switch ($current->getName()) { + case 'h1': + case 'h2': + case 'h3': + case 'h4': + case 'h5': + case 'h6': + case 'list': + case 'li': + if (($token['type'] == Zend_Markup_Token::TYPE_TAG) + && (($token['name'] == 'break') || ($token['name'] == 'p')) + ) { + return true; + } + break; + case 'break': + return false; + break; + default: + if (($token['type'] == Zend_Markup_Token::TYPE_TAG) && ($token['name'] == $current->getName())) { + return true; + } + break; + } + return false; + } + + /** + * Extract the attributes + * + * @param array $matches + * + * @return array + */ + protected function _extractAttributes(array $matches) + { + $attributes = array(); + + if (!empty($matches['attr_class'])) { + $attributes['class'] = $matches['attr_class']; + } + if (!empty($matches['attr_id'])) { + $attributes['id'] = $matches['attr_id']; + } + if (!empty($matches['attr_style'])) { + $attributes['style'] = $matches['attr_style']; + } + if (!empty($matches['attr_lang'])) { + $attributes['lang'] = $matches['attr_lang']; + } + if (!empty($matches['attr_align'])) { + switch ($matches['attr_align']) { + case '=': + $attributes['align'] = 'center'; + break; + case '>': + $attributes['align'] = 'right'; + break; + case '<>': + $attributes['align'] = 'justify'; + break; + default: + case '<': + $attributes['align'] = 'left'; + break; + } + } + + return $attributes; + } + +} \ No newline at end of file