web/enmi/Zend/Pdf/StringParser.php
changeset 19 1c2f13fd785c
parent 0 4eba9c11703f
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/enmi/Zend/Pdf/StringParser.php	Thu Jan 20 19:30:54 2011 +0100
@@ -0,0 +1,731 @@
+<?php
+/**
+ * Zend Framework
+ *
+ * LICENSE
+ *
+ * This source file is subject to the new BSD license that is bundled
+ * with this package in the file LICENSE.txt.
+ * It is also available through the world-wide-web at this URL:
+ * http://framework.zend.com/license/new-bsd
+ * If you did not receive a copy of the license and are unable to
+ * obtain it through the world-wide-web, please send an email
+ * to license@zend.com so we can send you a copy immediately.
+ *
+ * @category   Zend
+ * @package    Zend_Pdf
+ * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ * @version    $Id: StringParser.php 22311 2010-05-27 12:57:37Z padraic $
+ */
+
+
+/** Internally used classes */
+require_once 'Zend/Pdf/Element/Array.php';
+require_once 'Zend/Pdf/Element/String/Binary.php';
+require_once 'Zend/Pdf/Element/Boolean.php';
+require_once 'Zend/Pdf/Element/Dictionary.php';
+require_once 'Zend/Pdf/Element/Name.php';
+require_once 'Zend/Pdf/Element/Null.php';
+require_once 'Zend/Pdf/Element/Numeric.php';
+require_once 'Zend/Pdf/Element/Object.php';
+require_once 'Zend/Pdf/Element/Object/Stream.php';
+require_once 'Zend/Pdf/Element/Reference.php';
+require_once 'Zend/Pdf/Element/String.php';
+
+
+/**
+ * PDF string parser
+ *
+ * @package    Zend_Pdf
+ * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
+ * @license    http://framework.zend.com/license/new-bsd     New BSD License
+ */
+class Zend_Pdf_StringParser
+{
+    /**
+     * Source PDF
+     *
+     * @var string
+     */
+    public $data = '';
+
+    /**
+     * Current position in a data
+     *
+     * @var integer
+     */
+    public $offset = 0;
+
+    /**
+     * Current reference context
+     *
+     * @var Zend_Pdf_Element_Reference_Context
+     */
+    private $_context = null;
+
+    /**
+     * Array of elements of the currently parsed object/trailer
+     *
+     * @var array
+     */
+    private $_elements = array();
+
+    /**
+     * PDF objects factory.
+     *
+     * @var Zend_Pdf_ElementFactory_Interface
+     */
+    private $_objFactory = null;
+
+
+    /**
+     * Clean up resources.
+     *
+     * Clear current state to remove cyclic object references
+     */
+    public function cleanUp()
+    {
+        $this->_context = null;
+        $this->_elements = array();
+        $this->_objFactory = null;
+    }
+
+    /**
+     * Character with code $chCode is white space
+     *
+     * @param integer $chCode
+     * @return boolean
+     */
+    public static function isWhiteSpace($chCode)
+    {
+        if ($chCode == 0x00 || // null character
+            $chCode == 0x09 || // Tab
+            $chCode == 0x0A || // Line feed
+            $chCode == 0x0C || // Form Feed
+            $chCode == 0x0D || // Carriage return
+            $chCode == 0x20    // Space
+           ) {
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+
+    /**
+     * Character with code $chCode is a delimiter character
+     *
+     * @param integer $chCode
+     * @return boolean
+     */
+    public static function isDelimiter($chCode )
+    {
+        if ($chCode == 0x28 || // '('
+            $chCode == 0x29 || // ')'
+            $chCode == 0x3C || // '<'
+            $chCode == 0x3E || // '>'
+            $chCode == 0x5B || // '['
+            $chCode == 0x5D || // ']'
+            $chCode == 0x7B || // '{'
+            $chCode == 0x7D || // '}'
+            $chCode == 0x2F || // '/'
+            $chCode == 0x25    // '%'
+           ) {
+            return true;
+        } else {
+            return false;
+        }
+    }
+
+
+    /**
+     * Skip white space
+     *
+     * @param boolean $skipComment
+     */
+    public function skipWhiteSpace($skipComment = true)
+    {
+        if ($skipComment) {
+            while (true) {
+                $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
+
+                if ($this->offset < strlen($this->data)  &&  $this->data[$this->offset] == '%') {
+                    // Skip comment
+                    $this->offset += strcspn($this->data, "\r\n", $this->offset);
+                } else {
+                    // Non white space character not equal to '%' is found
+                    return;
+                }
+            }
+        } else {
+            $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
+        }
+
+//        /** Original (non-optimized) implementation. */
+//
+//        while ($this->offset < strlen($this->data)) {
+//            if (strpos("\x00\t\n\f\r ", $this->data[$this->offset]) !== false) {
+//                $this->offset++;
+//            } else if (ord($this->data[$this->offset]) == 0x25 && $skipComment) { // '%'
+//                $this->skipComment();
+//            } else {
+//                return;
+//            }
+//        }
+    }
+
+
+    /**
+     * Skip comment
+     */
+    public function skipComment()
+    {
+        while ($this->offset < strlen($this->data))
+        {
+            if (ord($this->data[$this->offset]) != 0x0A || // Line feed
+                ord($this->data[$this->offset]) != 0x0d    // Carriage return
+               ) {
+                $this->offset++;
+            } else {
+                return;
+            }
+        }
+    }
+
+
+    /**
+     * Read comment line
+     *
+     * @return string
+     */
+    public function readComment()
+    {
+        $this->skipWhiteSpace(false);
+
+        /** Check if it's a comment line */
+        if ($this->data[$this->offset] != '%') {
+            return '';
+        }
+
+        for ($start = $this->offset;
+             $this->offset < strlen($this->data);
+             $this->offset++) {
+            if (ord($this->data[$this->offset]) == 0x0A || // Line feed
+                ord($this->data[$this->offset]) == 0x0d    // Carriage return
+               ) {
+                break;
+            }
+        }
+
+        return substr($this->data, $start, $this->offset-$start);
+    }
+
+
+    /**
+     * Returns next lexeme from a pdf stream
+     *
+     * @return string
+     */
+    public function readLexeme()
+    {
+        // $this->skipWhiteSpace();
+        while (true) {
+            $this->offset += strspn($this->data, "\x00\t\n\f\r ", $this->offset);
+
+            if ($this->offset < strlen($this->data)  &&  $this->data[$this->offset] == '%') {
+                $this->offset += strcspn($this->data, "\r\n", $this->offset);
+            } else {
+                break;
+            }
+        }
+
+        if ($this->offset >= strlen($this->data)) {
+            return '';
+        }
+
+        if ( /* self::isDelimiter( ord($this->data[$start]) ) */
+             strpos('()<>[]{}/%', $this->data[$this->offset]) !== false ) {
+
+            switch (substr($this->data, $this->offset, 2)) {
+                case '<<':
+                    $this->offset += 2;
+                    return '<<';
+                    break;
+
+                case '>>':
+                    $this->offset += 2;
+                    return '>>';
+                    break;
+
+                default:
+                    return $this->data[$this->offset++];
+                    break;
+            }
+        } else {
+            $start = $this->offset;
+            $compare = '';
+            if( version_compare( phpversion(), '5.2.5' ) >= 0) {
+                $compare = "()<>[]{}/%\x00\t\n\f\r ";
+            } else {
+                $compare = "()<>[]{}/%\x00\t\n\r ";
+            }
+
+            $this->offset += strcspn($this->data, $compare, $this->offset);
+ 
+            return substr($this->data, $start, $this->offset - $start);
+        }
+    }
+
+
+    /**
+     * Read elemental object from a PDF stream
+     *
+     * @return Zend_Pdf_Element
+     * @throws Zend_Pdf_Exception
+     */
+    public function readElement($nextLexeme = null)
+    {
+        if ($nextLexeme === null) {
+            $nextLexeme = $this->readLexeme();
+        }
+
+        /**
+         * Note: readElement() method is a public method and could be invoked from other classes.
+         * If readElement() is used not by Zend_Pdf_StringParser::getObject() method, then we should not care
+         * about _elements member management.
+         */
+        switch ($nextLexeme) {
+            case '(':
+                return ($this->_elements[] = $this->_readString());
+
+            case '<':
+                return ($this->_elements[] = $this->_readBinaryString());
+
+            case '/':
+                return ($this->_elements[] = new Zend_Pdf_Element_Name(
+                                                    Zend_Pdf_Element_Name::unescape( $this->readLexeme() )
+                                                                      ));
+
+            case '[':
+                return ($this->_elements[] = $this->_readArray());
+
+            case '<<':
+                return ($this->_elements[] = $this->_readDictionary());
+
+            case ')':
+                // fall through to next case
+            case '>':
+                // fall through to next case
+            case ']':
+                // fall through to next case
+            case '>>':
+                // fall through to next case
+            case '{':
+                // fall through to next case
+            case '}':
+                require_once 'Zend/Pdf/Exception.php';
+                throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.',
+                                                $this->offset));
+
+            default:
+                if (strcasecmp($nextLexeme, 'true') == 0) {
+                    return ($this->_elements[] = new Zend_Pdf_Element_Boolean(true));
+                } else if (strcasecmp($nextLexeme, 'false') == 0) {
+                    return ($this->_elements[] = new Zend_Pdf_Element_Boolean(false));
+                } else if (strcasecmp($nextLexeme, 'null') == 0) {
+                    return ($this->_elements[] = new Zend_Pdf_Element_Null());
+                }
+
+                $ref = $this->_readReference($nextLexeme);
+                if ($ref !== null) {
+                    return ($this->_elements[] = $ref);
+                }
+
+                return ($this->_elements[] = $this->_readNumeric($nextLexeme));
+        }
+    }
+
+
+    /**
+     * Read string PDF object
+     * Also reads trailing ')' from a pdf stream
+     *
+     * @return Zend_Pdf_Element_String
+     * @throws Zend_Pdf_Exception
+     */
+    private function _readString()
+    {
+        $start = $this->offset;
+        $openedBrackets = 1;
+
+        $this->offset += strcspn($this->data, '()\\', $this->offset);
+
+        while ($this->offset < strlen($this->data)) {
+            switch (ord( $this->data[$this->offset] )) {
+                case 0x28: // '(' - opened bracket in the string, needs balanced pair.
+                    $this->offset++;
+                    $openedBrackets++;
+                    break;
+
+                case 0x29: // ')' - pair to the opened bracket
+                    $this->offset++;
+                    $openedBrackets--;
+                    break;
+
+                case 0x5C: // '\\' - escape sequence, skip next char from a check
+                    $this->offset += 2;
+            }
+
+            if ($openedBrackets == 0) {
+                break; // end of string
+            }
+
+            $this->offset += strcspn($this->data, '()\\', $this->offset);
+        }
+        if ($openedBrackets != 0) {
+            require_once 'Zend/Pdf/Exception.php';
+            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while string reading. Offset - 0x%X. \')\' expected.', $start));
+        }
+
+        return new Zend_Pdf_Element_String(Zend_Pdf_Element_String::unescape( substr($this->data,
+                                                                                     $start,
+                                                                                     $this->offset - $start - 1) ));
+    }
+
+
+    /**
+     * Read binary string PDF object
+     * Also reads trailing '>' from a pdf stream
+     *
+     * @return Zend_Pdf_Element_String_Binary
+     * @throws Zend_Pdf_Exception
+     */
+    private function _readBinaryString()
+    {
+        $start = $this->offset;
+
+        $this->offset += strspn($this->data, "\x00\t\n\f\r 0123456789abcdefABCDEF", $this->offset);
+
+        if ($this->offset >= strlen($this->data) - 1) {
+            require_once 'Zend/Pdf/Exception.php';
+            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while reading binary string. Offset - 0x%X. \'>\' expected.', $start));
+        }
+
+        if ($this->data[$this->offset++] != '>') {
+            require_once 'Zend/Pdf/Exception.php';
+            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected character while binary string reading. Offset - 0x%X.', $this->offset));
+        }
+
+        return new Zend_Pdf_Element_String_Binary(
+                       Zend_Pdf_Element_String_Binary::unescape( substr($this->data,
+                                                                        $start,
+                                                                        $this->offset - $start - 1) ));
+    }
+
+
+    /**
+     * Read array PDF object
+     * Also reads trailing ']' from a pdf stream
+     *
+     * @return Zend_Pdf_Element_Array
+     * @throws Zend_Pdf_Exception
+     */
+    private function _readArray()
+    {
+        $elements = array();
+
+        while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
+            if ($nextLexeme != ']') {
+                $elements[] = $this->readElement($nextLexeme);
+            } else {
+                return new Zend_Pdf_Element_Array($elements);
+            }
+        }
+
+        require_once 'Zend/Pdf/Exception.php';
+        throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while array reading. Offset - 0x%X. \']\' expected.', $this->offset));
+    }
+
+
+    /**
+     * Read dictionary PDF object
+     * Also reads trailing '>>' from a pdf stream
+     *
+     * @return Zend_Pdf_Element_Dictionary
+     * @throws Zend_Pdf_Exception
+     */
+    private function _readDictionary()
+    {
+        $dictionary = new Zend_Pdf_Element_Dictionary();
+
+        while ( strlen($nextLexeme = $this->readLexeme()) != 0 ) {
+            if ($nextLexeme != '>>') {
+                $nameStart = $this->offset - strlen($nextLexeme);
+
+                $name  = $this->readElement($nextLexeme);
+                $value = $this->readElement();
+
+                if (!$name instanceof Zend_Pdf_Element_Name) {
+                    require_once 'Zend/Pdf/Exception.php';
+                    throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Name object expected while dictionary reading. Offset - 0x%X.', $nameStart));
+                }
+
+                $dictionary->add($name, $value);
+            } else {
+                return $dictionary;
+            }
+        }
+
+        require_once 'Zend/Pdf/Exception.php';
+        throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Unexpected end of file while dictionary reading. Offset - 0x%X. \'>>\' expected.', $this->offset));
+    }
+
+
+    /**
+     * Read reference PDF object
+     *
+     * @param string $nextLexeme
+     * @return Zend_Pdf_Element_Reference
+     */
+    private function _readReference($nextLexeme = null)
+    {
+        $start = $this->offset;
+
+        if ($nextLexeme === null) {
+            $objNum = $this->readLexeme();
+        } else {
+            $objNum = $nextLexeme;
+        }
+        if (!ctype_digit($objNum)) { // it's not a reference
+            $this->offset = $start;
+            return null;
+        }
+
+        $genNum = $this->readLexeme();
+        if (!ctype_digit($genNum)) { // it's not a reference
+            $this->offset = $start;
+            return null;
+        }
+
+        $rMark  = $this->readLexeme();
+        if ($rMark != 'R') { // it's not a reference
+            $this->offset = $start;
+            return null;
+        }
+
+        $ref = new Zend_Pdf_Element_Reference((int)$objNum, (int)$genNum, $this->_context, $this->_objFactory->resolve());
+
+        return $ref;
+    }
+
+
+    /**
+     * Read numeric PDF object
+     *
+     * @param string $nextLexeme
+     * @return Zend_Pdf_Element_Numeric
+     */
+    private function _readNumeric($nextLexeme = null)
+    {
+        if ($nextLexeme === null) {
+            $nextLexeme = $this->readLexeme();
+        }
+
+        return new Zend_Pdf_Element_Numeric($nextLexeme);
+    }
+
+
+    /**
+     * Read inderect object from a PDF stream
+     *
+     * @param integer $offset
+     * @param Zend_Pdf_Element_Reference_Context $context
+     * @return Zend_Pdf_Element_Object
+     */
+    public function getObject($offset, Zend_Pdf_Element_Reference_Context $context)
+    {
+        if ($offset === null ) {
+            return new Zend_Pdf_Element_Null();
+        }
+
+        // Save current offset to make getObject() reentrant
+        $offsetSave = $this->offset;
+
+        $this->offset    = $offset;
+        $this->_context  = $context;
+        $this->_elements = array();
+
+        $objNum = $this->readLexeme();
+        if (!ctype_digit($objNum)) {
+            require_once 'Zend/Pdf/Exception.php';
+            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object number expected.', $this->offset - strlen($objNum)));
+        }
+
+        $genNum = $this->readLexeme();
+        if (!ctype_digit($genNum)) {
+            require_once 'Zend/Pdf/Exception.php';
+            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Object generation number expected.', $this->offset - strlen($genNum)));
+        }
+
+        $objKeyword = $this->readLexeme();
+        if ($objKeyword != 'obj') {
+            require_once 'Zend/Pdf/Exception.php';
+            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'obj\' keyword expected.', $this->offset - strlen($objKeyword)));
+        }
+
+        $objValue = $this->readElement();
+
+        $nextLexeme = $this->readLexeme();
+
+        if( $nextLexeme == 'endobj' ) {
+            /**
+             * Object is not generated by factory (thus it's not marked as modified object).
+             * But factory is assigned to the obect.
+             */
+            $obj = new Zend_Pdf_Element_Object($objValue, (int)$objNum, (int)$genNum, $this->_objFactory->resolve());
+
+            foreach ($this->_elements as $element) {
+                $element->setParentObject($obj);
+            }
+
+            // Restore offset value
+            $this->offset = $offsetSave;
+
+            return $obj;
+        }
+
+        /**
+         * It's a stream object
+         */
+        if ($nextLexeme != 'stream') {
+            require_once 'Zend/Pdf/Exception.php';
+            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' or \'stream\' keywords expected.', $this->offset - strlen($nextLexeme)));
+        }
+
+        if (!$objValue instanceof Zend_Pdf_Element_Dictionary) {
+            require_once 'Zend/Pdf/Exception.php';
+            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Stream extent must be preceded by stream dictionary.', $this->offset - strlen($nextLexeme)));
+        }
+
+        /**
+         * References are automatically dereferenced at this moment.
+         */
+        $streamLength = $objValue->Length->value;
+
+        /**
+         * 'stream' keyword must be followed by either cr-lf sequence or lf character only.
+         * This restriction gives the possibility to recognize all cases exactly
+         */
+        if ($this->data[$this->offset] == "\r" &&
+            $this->data[$this->offset + 1] == "\n"    ) {
+            $this->offset += 2;
+        } else if ($this->data[$this->offset] == "\n"    ) {
+            $this->offset++;
+        } else {
+            require_once 'Zend/Pdf/Exception.php';
+            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'stream\' must be followed by either cr-lf sequence or lf character only.', $this->offset - strlen($nextLexeme)));
+        }
+
+        $dataOffset = $this->offset;
+
+        $this->offset += $streamLength;
+
+        $nextLexeme = $this->readLexeme();
+        if ($nextLexeme != 'endstream') {
+            require_once 'Zend/Pdf/Exception.php';
+            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endstream\' keyword expected.', $this->offset - strlen($nextLexeme)));
+        }
+
+        $nextLexeme = $this->readLexeme();
+        if ($nextLexeme != 'endobj') {
+            require_once 'Zend/Pdf/Exception.php';
+            throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. \'endobj\' keyword expected.', $this->offset - strlen($nextLexeme)));
+        }
+
+        $obj = new Zend_Pdf_Element_Object_Stream(substr($this->data,
+                                                         $dataOffset,
+                                                         $streamLength),
+                                                  (int)$objNum,
+                                                  (int)$genNum,
+                                                  $this->_objFactory->resolve(),
+                                                  $objValue);
+
+        foreach ($this->_elements as $element) {
+            $element->setParentObject($obj);
+        }
+
+        // Restore offset value
+        $this->offset = $offsetSave;
+
+        return $obj;
+    }
+
+
+    /**
+     * Get length of source string
+     *
+     * @return integer
+     */
+    public function getLength()
+    {
+        return strlen($this->data);
+    }
+
+    /**
+     * Get source string
+     *
+     * @return string
+     */
+    public function getString()
+    {
+        return $this->data;
+    }
+
+
+    /**
+     * Parse integer value from a binary stream
+     *
+     * @param string $stream
+     * @param integer $offset
+     * @param integer $size
+     * @return integer
+     */
+    public static function parseIntFromStream($stream, $offset, $size)
+    {
+        $value = 0;
+        for ($count = 0; $count < $size; $count++) {
+            $value *= 256;
+            $value += ord($stream[$offset + $count]);
+        }
+
+        return $value;
+    }
+
+
+
+    /**
+     * Set current context
+     *
+     * @param Zend_Pdf_Element_Reference_Context $context
+     */
+    public function setContext(Zend_Pdf_Element_Reference_Context $context)
+    {
+        $this->_context = $context;
+    }
+
+    /**
+     * Object constructor
+     *
+     * Note: PHP duplicates string, which is sent by value, only of it's updated.
+     * Thus we don't need to care about overhead
+     *
+     * @param string $pdfString
+     * @param Zend_Pdf_ElementFactory_Interface $factory
+     */
+    public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory)
+    {
+        $this->data         = $source;
+        $this->_objFactory  = $factory;
+    }
+}