diff -r bd595ad770fc -r 1c2f13fd785c web/enmi/Zend/Pdf/StringParser.php --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/enmi/Zend/Pdf/StringParser.php Thu Jan 20 19:30:54 2011 +0100 @@ -0,0 +1,731 @@ +_context = null; + $this->_elements = array(); + $this->_objFactory = null; + } + + /** + * Character with code $chCode is white space + * + * @param integer $chCode + * @return boolean + */ + public static function isWhiteSpace($chCode) + { + if ($chCode == 0x00 || // null character + $chCode == 0x09 || // Tab + $chCode == 0x0A || // Line feed + $chCode == 0x0C || // Form Feed + $chCode == 0x0D || // Carriage return + $chCode == 0x20 // Space + ) { + return true; + } else { + return false; + } + } + + + /** + * Character with code $chCode is a delimiter character + * + * @param integer $chCode + * @return boolean + */ + public static function isDelimiter($chCode ) + { + if ($chCode == 0x28 || // '(' + $chCode == 0x29 || // ')' + $chCode == 0x3C || // '<' + $chCode == 0x3E || // '>' + $chCode == 0x5B || // '[' + $chCode == 0x5D || // ']' + $chCode == 0x7B || // '{' + $chCode == 0x7D || // '}' + $chCode == 0x2F || // '/' + $chCode == 0x25 // '%' + ) { + return true; + } else { + return false; + } + } + + + /** + * Skip white space + * + * @param boolean $skipComment + */ + public function skipWhiteSpace($skipComment = true) + { + if ($skipComment) { + while (true) { + $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset); + + if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') { + // Skip comment + $this->offset += strcspn($this->data, "\r\n", $this->offset); + } else { + // Non white space character not equal to '%' is found + return; + } + } + } else { + $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset); + } + +// /** Original (non-optimized) implementation. */ +// +// while ($this->offset < strlen($this->data)) { +// if (strpos("\x00\t\n\f\r ", $this->data[$this->offset]) !== false) { +// $this->offset++; +// } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%' +// $this->skipComment(); +// } else { +// return; +// } +// } + } + + + /** + * Skip comment + */ + public function skipComment() + { + while ($this->offset < strlen($this->data)) + { + if (ord($this->data[$this->offset]) != 0x0A || // Line feed + ord($this->data[$this->offset]) != 0x0d // Carriage return + ) { + $this->offset++; + } else { + return; + } + } + } + + + /** + * Read comment line + * + * @return string + */ + public function readComment() + { + $this->skipWhiteSpace(false); + + /** Check if it's a comment line */ + if ($this->data[$this->offset] != '%') { + return ''; + } + + for ($start = $this->offset; + $this->offset < strlen($this->data); + $this->offset++) { + if (ord($this->data[$this->offset]) == 0x0A || // Line feed + ord($this->data[$this->offset]) == 0x0d // Carriage return + ) { + break; + } + } + + return substr($this->data, $start, $this->offset-$start); + } + + + /** + * Returns next lexeme from a pdf stream + * + * @return string + */ + public function readLexeme() + { + // $this->skipWhiteSpace(); + while (true) { + $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset); + + if ($this->offset < strlen($this->data) && $this->data[$this->offset] == '%') { + $this->offset += strcspn($this->data, "\r\n", $this->offset); + } else { + break; + } + } + + if ($this->offset >= strlen($this->data)) { + return ''; + } + + if ( /* self::isDelimiter( ord($this->data[$start]) ) */ + strpos('()<>[]{}/%', $this->data[$this->offset]) !== false ) { + + switch (substr($this->data, $this->offset, 2)) { + case '<<': + $this->offset += 2; + return '<<'; + break; + + case '>>': + $this->offset += 2; + return '>>'; + break; + + default: + return $this->data[$this->offset++]; + break; + } + } else { + $start = $this->offset; + $compare = ''; + if( version_compare( phpversion(), '5.2.5' ) >= 0) { + $compare = "()<>[]{}/%\x00\t\n\f\r "; + } else { + $compare = "()<>[]{}/%\x00\t\n\r "; + } + + $this->offset += strcspn($this->data, $compare, $this->offset); + + return substr($this->data, $start, $this->offset - $start); + } + } + + + /** + * Read elemental object from a PDF stream + * + * @return Zend_Pdf_Element + * @throws Zend_Pdf_Exception + */ + public function readElement($nextLexeme = null) + { + if ($nextLexeme === null) { + $nextLexeme = $this->readLexeme(); + } + + /** + * Note: readElement() method is a public method and could be invoked from other classes. + * If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care + * about _elements member management. + */ + switch ($nextLexeme) { + case '(': + return ($this->_elements[] = $this->_readString()); + + case '<': + return ($this->_elements[] = $this->_readBinaryString()); + + case '/': + return ($this->_elements[] = new Zend_Pdf_Element_Name( + Zend_Pdf_Element_Name::unescape( $this->readLexeme() ) + )); + + case '[': + return ($this->_elements[] = $this->_readArray()); + + case '<<': + return ($this->_elements[] = $this->_readDictionary()); + + case ')': + // fall through to next case + case '>': + // fall through to next case + case ']': + // fall through to next case + case '>>': + // fall through to next case + case '{': + // fall through to next case + case '}': + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.', + $this->offset)); + + default: + if (strcasecmp($nextLexeme, 'true') == 0) { + return ($this->_elements[] = new Zend_Pdf_Element_Boolean(true)); + } else if (strcasecmp($nextLexeme, 'false') == 0) { + return ($this->_elements[] = new Zend_Pdf_Element_Boolean(false)); + } else if (strcasecmp($nextLexeme, 'null') == 0) { + return ($this->_elements[] = new Zend_Pdf_Element_Null()); + } + + $ref = $this->_readReference($nextLexeme); + if ($ref !== null) { + return ($this->_elements[] = $ref); + } + + return ($this->_elements[] = $this->_readNumeric($nextLexeme)); + } + } + + + /** + * Read string PDF object + * Also reads trailing ')' from a pdf stream + * + * @return Zend_Pdf_Element_String + * @throws Zend_Pdf_Exception + */ + private function _readString() + { + $start = $this->offset; + $openedBrackets = 1; + + $this->offset += strcspn($this->data, '()\\', $this->offset); + + while ($this->offset < strlen($this->data)) { + switch (ord( $this->data[$this->offset] )) { + case 0x28: // '(' - opened bracket in the string, needs balanced pair. + $this->offset++; + $openedBrackets++; + break; + + case 0x29: // ')' - pair to the opened bracket + $this->offset++; + $openedBrackets--; + break; + + case 0x5C: // '\\' - escape sequence, skip next char from a check + $this->offset += 2; + } + + if ($openedBrackets == 0) { + break; // end of string + } + + $this->offset += strcspn($this->data, '()\\', $this->offset); + } + if ($openedBrackets != 0) { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start)); + } + + return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data, + $start, + $this->offset - $start - 1) )); + } + + + /** + * Read binary string PDF object + * Also reads trailing '>' from a pdf stream + * + * @return Zend_Pdf_Element_String_Binary + * @throws Zend_Pdf_Exception + */ + private function _readBinaryString() + { + $start = $this->offset; + + $this->offset += strspn($this->data, "\x00\t\n\f\r 0123456789abcdefABCDEF", $this->offset); + + if ($this->offset >= strlen($this->data) - 1) { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while reading binary string. Offset - 0x%X. \'>\' expected.', $start)); + } + + if ($this->data[$this->offset++] != '>') { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset)); + } + + return new Zend_Pdf_Element_String_Binary( + Zend_Pdf_Element_String_Binary::unescape( substr($this->data, + $start, + $this->offset - $start - 1) )); + } + + + /** + * Read array PDF object + * Also reads trailing ']' from a pdf stream + * + * @return Zend_Pdf_Element_Array + * @throws Zend_Pdf_Exception + */ + private function _readArray() + { + $elements = array(); + + while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) { + if ($nextLexeme != ']') { + $elements[] = $this->readElement($nextLexeme); + } else { + return new Zend_Pdf_Element_Array($elements); + } + } + + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset)); + } + + + /** + * Read dictionary PDF object + * Also reads trailing '>>' from a pdf stream + * + * @return Zend_Pdf_Element_Dictionary + * @throws Zend_Pdf_Exception + */ + private function _readDictionary() + { + $dictionary = new Zend_Pdf_Element_Dictionary(); + + while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) { + if ($nextLexeme != '>>') { + $nameStart = $this->offset - strlen($nextLexeme); + + $name = $this->readElement($nextLexeme); + $value = $this->readElement(); + + if (!$name instanceof Zend_Pdf_Element_Name) { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart)); + } + + $dictionary->add($name, $value); + } else { + return $dictionary; + } + } + + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset)); + } + + + /** + * Read reference PDF object + * + * @param string $nextLexeme + * @return Zend_Pdf_Element_Reference + */ + private function _readReference($nextLexeme = null) + { + $start = $this->offset; + + if ($nextLexeme === null) { + $objNum = $this->readLexeme(); + } else { + $objNum = $nextLexeme; + } + if (!ctype_digit($objNum)) { // it's not a reference + $this->offset = $start; + return null; + } + + $genNum = $this->readLexeme(); + if (!ctype_digit($genNum)) { // it's not a reference + $this->offset = $start; + return null; + } + + $rMark = $this->readLexeme(); + if ($rMark != 'R') { // it's not a reference + $this->offset = $start; + return null; + } + + $ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context, $this->_objFactory->resolve()); + + return $ref; + } + + + /** + * Read numeric PDF object + * + * @param string $nextLexeme + * @return Zend_Pdf_Element_Numeric + */ + private function _readNumeric($nextLexeme = null) + { + if ($nextLexeme === null) { + $nextLexeme = $this->readLexeme(); + } + + return new Zend_Pdf_Element_Numeric($nextLexeme); + } + + + /** + * Read inderect object from a PDF stream + * + * @param integer $offset + * @param Zend_Pdf_Element_Reference_Context $context + * @return Zend_Pdf_Element_Object + */ + public function getObject($offset, Zend_Pdf_Element_Reference_Context $context) + { + if ($offset === null ) { + return new Zend_Pdf_Element_Null(); + } + + // Save current offset to make getObject() reentrant + $offsetSave = $this->offset; + + $this->offset = $offset; + $this->_context = $context; + $this->_elements = array(); + + $objNum = $this->readLexeme(); + if (!ctype_digit($objNum)) { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum))); + } + + $genNum = $this->readLexeme(); + if (!ctype_digit($genNum)) { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum))); + } + + $objKeyword = $this->readLexeme(); + if ($objKeyword != 'obj') { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword))); + } + + $objValue = $this->readElement(); + + $nextLexeme = $this->readLexeme(); + + if( $nextLexeme == 'endobj' ) { + /** + * Object is not generated by factory (thus it's not marked as modified object). + * But factory is assigned to the obect. + */ + $obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory->resolve()); + + foreach ($this->_elements as $element) { + $element->setParentObject($obj); + } + + // Restore offset value + $this->offset = $offsetSave; + + return $obj; + } + + /** + * It's a stream object + */ + if ($nextLexeme != 'stream') { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme))); + } + + if (!$objValue instanceof Zend_Pdf_Element_Dictionary) { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme))); + } + + /** + * References are automatically dereferenced at this moment. + */ + $streamLength = $objValue->Length->value; + + /** + * 'stream' keyword must be followed by either cr-lf sequence or lf character only. + * This restriction gives the possibility to recognize all cases exactly + */ + if ($this->data[$this->offset] == "\r" && + $this->data[$this->offset + 1] == "\n" ) { + $this->offset += 2; + } else if ($this->data[$this->offset] == "\n" ) { + $this->offset++; + } else { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme))); + } + + $dataOffset = $this->offset; + + $this->offset += $streamLength; + + $nextLexeme = $this->readLexeme(); + if ($nextLexeme != 'endstream') { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme))); + } + + $nextLexeme = $this->readLexeme(); + if ($nextLexeme != 'endobj') { + require_once 'Zend/Pdf/Exception.php'; + throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme))); + } + + $obj = new Zend_Pdf_Element_Object_Stream(substr($this->data, + $dataOffset, + $streamLength), + (int)$objNum, + (int)$genNum, + $this->_objFactory->resolve(), + $objValue); + + foreach ($this->_elements as $element) { + $element->setParentObject($obj); + } + + // Restore offset value + $this->offset = $offsetSave; + + return $obj; + } + + + /** + * Get length of source string + * + * @return integer + */ + public function getLength() + { + return strlen($this->data); + } + + /** + * Get source string + * + * @return string + */ + public function getString() + { + return $this->data; + } + + + /** + * Parse integer value from a binary stream + * + * @param string $stream + * @param integer $offset + * @param integer $size + * @return integer + */ + public static function parseIntFromStream($stream, $offset, $size) + { + $value = 0; + for ($count = 0; $count < $size; $count++) { + $value *= 256; + $value += ord($stream[$offset + $count]); + } + + return $value; + } + + + + /** + * Set current context + * + * @param Zend_Pdf_Element_Reference_Context $context + */ + public function setContext(Zend_Pdf_Element_Reference_Context $context) + { + $this->_context = $context; + } + + /** + * Object constructor + * + * Note: PHP duplicates string, which is sent by value, only of it's updated. + * Thus we don't need to care about overhead + * + * @param string $pdfString + * @param Zend_Pdf_ElementFactory_Interface $factory + */ + public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory) + { + $this->data = $source; + $this->_objFactory = $factory; + } +}