web/lib/Zend/Pdf/Parser.php
changeset 64 162c1de6545a
parent 19 1c2f13fd785c
child 68 ecaf28ffe26e
equal deleted inserted replaced
63:5b37998e522e 64:162c1de6545a
       
     1 <?php
       
     2 /**
       
     3  * Zend Framework
       
     4  *
       
     5  * LICENSE
       
     6  *
       
     7  * This source file is subject to the new BSD license that is bundled
       
     8  * with this package in the file LICENSE.txt.
       
     9  * It is also available through the world-wide-web at this URL:
       
    10  * http://framework.zend.com/license/new-bsd
       
    11  * If you did not receive a copy of the license and are unable to
       
    12  * obtain it through the world-wide-web, please send an email
       
    13  * to license@zend.com so we can send you a copy immediately.
       
    14  *
       
    15  * @category   Zend
       
    16  * @package    Zend_Pdf
       
    17  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    18  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    19  * @version    $Id: Parser.php 23395 2010-11-19 15:30:47Z alexander $
       
    20  */
       
    21 
       
    22 /** Internally used classes */
       
    23 require_once 'Zend/Pdf/Element.php';
       
    24 require_once 'Zend/Pdf/Element/Numeric.php';
       
    25 
       
    26 
       
    27 /** Zend_Pdf_StringParser */
       
    28 require_once 'Zend/Pdf/StringParser.php';
       
    29 
       
    30 
       
    31 /**
       
    32  * PDF file parser
       
    33  *
       
    34  * @package    Zend_Pdf
       
    35  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    36  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    37  */
       
    38 class Zend_Pdf_Parser
       
    39 {
       
    40     /**
       
    41      * String parser
       
    42      *
       
    43      * @var Zend_Pdf_StringParser
       
    44      */
       
    45     private $_stringParser;
       
    46 
       
    47     /**
       
    48      * Last PDF file trailer
       
    49      *
       
    50      * @var Zend_Pdf_Trailer_Keeper
       
    51      */
       
    52     private $_trailer;
       
    53 
       
    54     /**
       
    55      * PDF version specified in the file header
       
    56      *
       
    57      * @var string
       
    58      */
       
    59     private $_pdfVersion;
       
    60 
       
    61 
       
    62     /**
       
    63      * Get length of source PDF
       
    64      *
       
    65      * @return integer
       
    66      */
       
    67     public function getPDFLength()
       
    68     {
       
    69         return strlen($this->_stringParser->data);
       
    70     }
       
    71 
       
    72     /**
       
    73      * Get PDF String
       
    74      *
       
    75      * @return string
       
    76      */
       
    77     public function getPDFString()
       
    78     {
       
    79         return $this->_stringParser->data;
       
    80     }
       
    81 
       
    82     /**
       
    83      * PDF version specified in the file header
       
    84      *
       
    85      * @return string
       
    86      */
       
    87     public function getPDFVersion()
       
    88     {
       
    89         return $this->_pdfVersion;
       
    90     }
       
    91 
       
    92     /**
       
    93      * Load XReference table and referenced objects
       
    94      *
       
    95      * @param integer $offset
       
    96      * @throws Zend_Pdf_Exception
       
    97      * @return Zend_Pdf_Trailer_Keeper
       
    98      */
       
    99     private function _loadXRefTable($offset)
       
   100     {
       
   101         $this->_stringParser->offset = $offset;
       
   102 
       
   103         require_once 'Zend/Pdf/Element/Reference/Table.php';
       
   104         $refTable = new Zend_Pdf_Element_Reference_Table();
       
   105         require_once 'Zend/Pdf/Element/Reference/Context.php';
       
   106         $context  = new Zend_Pdf_Element_Reference_Context($this->_stringParser, $refTable);
       
   107         $this->_stringParser->setContext($context);
       
   108 
       
   109         $nextLexeme = $this->_stringParser->readLexeme();
       
   110         if ($nextLexeme == 'xref') {
       
   111             /**
       
   112              * Common cross-reference table
       
   113              */
       
   114             $this->_stringParser->skipWhiteSpace();
       
   115             while ( ($nextLexeme = $this->_stringParser->readLexeme()) != 'trailer' ) {
       
   116                 if (!ctype_digit($nextLexeme)) {
       
   117                     require_once 'Zend/Pdf/Exception.php';
       
   118                     throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($nextLexeme)));
       
   119                 }
       
   120                 $objNum = (int)$nextLexeme;
       
   121 
       
   122                 $refCount = $this->_stringParser->readLexeme();
       
   123                 if (!ctype_digit($refCount)) {
       
   124                     require_once 'Zend/Pdf/Exception.php';
       
   125                     throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($refCount)));
       
   126                 }
       
   127 
       
   128                 $this->_stringParser->skipWhiteSpace();
       
   129                 while ($refCount > 0) {
       
   130                     $objectOffset = substr($this->_stringParser->data, $this->_stringParser->offset, 10);
       
   131                     if (!ctype_digit($objectOffset)) {
       
   132                         require_once 'Zend/Pdf/Exception.php';
       
   133                         throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset));
       
   134                     }
       
   135                     // Force $objectOffset to be treated as decimal instead of octal number
       
   136                     for ($numStart = 0; $numStart < strlen($objectOffset)-1; $numStart++) {
       
   137                         if ($objectOffset[$numStart] != '0') {
       
   138                             break;
       
   139                         }
       
   140                     }
       
   141                     $objectOffset = substr($objectOffset, $numStart);
       
   142                     $this->_stringParser->offset += 10;
       
   143 
       
   144                     if (strpos("\x00\t\n\f\r ", $this->_stringParser->data[$this->_stringParser->offset]) === false) {
       
   145                         require_once 'Zend/Pdf/Exception.php';
       
   146                         throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
       
   147                     }
       
   148                     $this->_stringParser->offset++;
       
   149 
       
   150                     $genNumber = substr($this->_stringParser->data, $this->_stringParser->offset, 5);
       
   151                     if (!ctype_digit($objectOffset)) {
       
   152                         require_once 'Zend/Pdf/Exception.php';
       
   153                         throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset));
       
   154                     }
       
   155                     // Force $objectOffset to be treated as decimal instead of octal number
       
   156                     for ($numStart = 0; $numStart < strlen($genNumber)-1; $numStart++) {
       
   157                         if ($genNumber[$numStart] != '0') {
       
   158                             break;
       
   159                         }
       
   160                     }
       
   161                     $genNumber = substr($genNumber, $numStart);
       
   162                     $this->_stringParser->offset += 5;
       
   163 
       
   164                     if (strpos("\x00\t\n\f\r ", $this->_stringParser->data[$this->_stringParser->offset]) === false) {
       
   165                         require_once 'Zend/Pdf/Exception.php';
       
   166                         throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
       
   167                     }
       
   168                     $this->_stringParser->offset++;
       
   169 
       
   170                     $inUseKey = $this->_stringParser->data[$this->_stringParser->offset];
       
   171                     $this->_stringParser->offset++;
       
   172 
       
   173                     switch ($inUseKey) {
       
   174                         case 'f':
       
   175                             // free entry
       
   176                             unset( $this->_refTable[$objNum . ' ' . $genNumber . ' R'] );
       
   177                             $refTable->addReference($objNum . ' ' . $genNumber . ' R',
       
   178                                                     $objectOffset,
       
   179                                                     false);
       
   180                             break;
       
   181 
       
   182                         case 'n':
       
   183                             // in-use entry
       
   184 
       
   185                             $refTable->addReference($objNum . ' ' . $genNumber . ' R',
       
   186                                                     $objectOffset,
       
   187                                                     true);
       
   188                     }
       
   189 
       
   190                     if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
       
   191                         require_once 'Zend/Pdf/Exception.php';
       
   192                         throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
       
   193                     }
       
   194                     $this->_stringParser->offset++;
       
   195                     if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) {
       
   196                         require_once 'Zend/Pdf/Exception.php';
       
   197                         throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset));
       
   198                     }
       
   199                     $this->_stringParser->offset++;
       
   200 
       
   201                     $refCount--;
       
   202                     $objNum++;
       
   203                 }
       
   204             }
       
   205 
       
   206             $trailerDictOffset = $this->_stringParser->offset;
       
   207             $trailerDict = $this->_stringParser->readElement();
       
   208             if (!$trailerDict instanceof Zend_Pdf_Element_Dictionary) {
       
   209                 require_once 'Zend/Pdf/Exception.php';
       
   210                 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.  Dictionary expected after \'trailer\' keyword.', $trailerDictOffset));
       
   211             }
       
   212         } else {
       
   213             $xrefStream = $this->_stringParser->getObject($offset, $context);
       
   214 
       
   215             if (!$xrefStream instanceof Zend_Pdf_Element_Object_Stream) {
       
   216                 require_once 'Zend/Pdf/Exception.php';
       
   217                 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.  Cross-reference stream expected.', $offset));
       
   218             }
       
   219 
       
   220             $trailerDict = $xrefStream->dictionary;
       
   221             if ($trailerDict->Type->value != 'XRef') {
       
   222                 require_once 'Zend/Pdf/Exception.php';
       
   223                 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X.  Cross-reference stream object must have /Type property assigned to /XRef.', $offset));
       
   224             }
       
   225             if ($trailerDict->W === null  || $trailerDict->W->getType() != Zend_Pdf_Element::TYPE_ARRAY) {
       
   226                 require_once 'Zend/Pdf/Exception.php';
       
   227                 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary doesn\'t have W entry or it\'s not an array.', $offset));
       
   228             }
       
   229 
       
   230             $entryField1Size = $trailerDict->W->items[0]->value;
       
   231             $entryField2Size = $trailerDict->W->items[1]->value;
       
   232             $entryField3Size = $trailerDict->W->items[2]->value;
       
   233 
       
   234             if ($entryField2Size == 0 || $entryField3Size == 0) {
       
   235                 require_once 'Zend/Pdf/Exception.php';
       
   236                 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Wrong W dictionary entry. Only type field of stream entries has default value and could be zero length.', $offset));
       
   237             }
       
   238 
       
   239             $xrefStreamData = $xrefStream->value;
       
   240 
       
   241             if ($trailerDict->Index !== null) {
       
   242                 if ($trailerDict->Index->getType() != Zend_Pdf_Element::TYPE_ARRAY) {
       
   243                     require_once 'Zend/Pdf/Exception.php';
       
   244                     throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary Index entry must be an array.', $offset));
       
   245                 }
       
   246                 $sections = count($trailerDict->Index->items)/2;
       
   247             } else {
       
   248                 $sections = 1;
       
   249             }
       
   250 
       
   251             $streamOffset = 0;
       
   252 
       
   253             $size    = $entryField1Size + $entryField2Size + $entryField3Size;
       
   254             $entries = strlen($xrefStreamData)/$size;
       
   255 
       
   256             for ($count = 0; $count < $sections; $count++) {
       
   257                 if ($trailerDict->Index !== null) {
       
   258                     $objNum  = $trailerDict->Index->items[$count*2    ]->value;
       
   259                     $entries = $trailerDict->Index->items[$count*2 + 1]->value;
       
   260                 } else {
       
   261                     $objNum  = 0;
       
   262                     $entries = $trailerDict->Size->value;
       
   263                 }
       
   264 
       
   265                 for ($count2 = 0; $count2 < $entries; $count2++) {
       
   266                     if ($entryField1Size == 0) {
       
   267                         $type = 1;
       
   268                     } else if ($entryField1Size == 1) { // Optimyze one-byte field case
       
   269                         $type = ord($xrefStreamData[$streamOffset++]);
       
   270                     } else {
       
   271                         $type = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField1Size);
       
   272                         $streamOffset += $entryField1Size;
       
   273                     }
       
   274 
       
   275                     if ($entryField2Size == 1) { // Optimyze one-byte field case
       
   276                         $field2 = ord($xrefStreamData[$streamOffset++]);
       
   277                     } else {
       
   278                         $field2 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField2Size);
       
   279                         $streamOffset += $entryField2Size;
       
   280                     }
       
   281 
       
   282                     if ($entryField3Size == 1) { // Optimyze one-byte field case
       
   283                         $field3 = ord($xrefStreamData[$streamOffset++]);
       
   284                     } else {
       
   285                         $field3 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField3Size);
       
   286                         $streamOffset += $entryField3Size;
       
   287                     }
       
   288 
       
   289                     switch ($type) {
       
   290                         case 0:
       
   291                             // Free object
       
   292                             $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, false);
       
   293                             // Debug output:
       
   294                             // echo "Free object - $objNum $field3 R, next free - $field2\n";
       
   295                             break;
       
   296 
       
   297                         case 1:
       
   298                             // In use object
       
   299                             $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, true);
       
   300                             // Debug output:
       
   301                             // echo "In-use object - $objNum $field3 R, offset - $field2\n";
       
   302                             break;
       
   303 
       
   304                         case 2:
       
   305                             // Object in an object stream
       
   306                             // Debug output:
       
   307                             // echo "Compressed object - $objNum 0 R, object stream - $field2 0 R, offset - $field3\n";
       
   308                             break;
       
   309                     }
       
   310 
       
   311                     $objNum++;
       
   312                 }
       
   313             }
       
   314 
       
   315             // $streamOffset . ' ' . strlen($xrefStreamData) . "\n";
       
   316             // "$entries\n";
       
   317             require_once 'Zend/Pdf/Exception.php';
       
   318             throw new Zend_Pdf_Exception('Cross-reference streams are not supported yet.');
       
   319         }
       
   320 
       
   321 
       
   322         require_once 'Zend/Pdf/Trailer/Keeper.php';
       
   323         $trailerObj = new Zend_Pdf_Trailer_Keeper($trailerDict, $context);
       
   324         if ($trailerDict->Prev instanceof Zend_Pdf_Element_Numeric ||
       
   325             $trailerDict->Prev instanceof Zend_Pdf_Element_Reference ) {
       
   326             $trailerObj->setPrev($this->_loadXRefTable($trailerDict->Prev->value));
       
   327             $context->getRefTable()->setParent($trailerObj->getPrev()->getRefTable());
       
   328         }
       
   329 
       
   330         /**
       
   331          * We set '/Prev' dictionary property to the current cross-reference section offset.
       
   332          * It doesn't correspond to the actual data, but is true when trailer will be used
       
   333          * as a trailer for next generated PDF section.
       
   334          */
       
   335         $trailerObj->Prev = new Zend_Pdf_Element_Numeric($offset);
       
   336 
       
   337         return $trailerObj;
       
   338     }
       
   339 
       
   340 
       
   341     /**
       
   342      * Get Trailer object
       
   343      *
       
   344      * @return Zend_Pdf_Trailer_Keeper
       
   345      */
       
   346     public function getTrailer()
       
   347     {
       
   348         return $this->_trailer;
       
   349     }
       
   350 
       
   351     /**
       
   352      * Object constructor
       
   353      *
       
   354      * Note: PHP duplicates string, which is sent by value, only of it's updated.
       
   355      * Thus we don't need to care about overhead
       
   356      *
       
   357      * @param mixed $source
       
   358      * @param Zend_Pdf_ElementFactory_Interface $factory
       
   359      * @param boolean $load
       
   360      * @throws Zend_Exception
       
   361      */
       
   362     public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory, $load)
       
   363     {
       
   364         if ($load) {
       
   365             if (($pdfFile = @fopen($source, 'rb')) === false ) {
       
   366                 require_once 'Zend/Pdf/Exception.php';
       
   367                 throw new Zend_Pdf_Exception( "Can not open '$source' file for reading." );
       
   368             }
       
   369 
       
   370             $data = '';
       
   371             $byteCount = filesize($source);
       
   372             while ($byteCount > 0 && !feof($pdfFile)) {
       
   373                 $nextBlock = fread($pdfFile, $byteCount);
       
   374                 if ($nextBlock === false) {
       
   375                     require_once 'Zend/Pdf/Exception.php';
       
   376                     throw new Zend_Pdf_Exception( "Error occured while '$source' file reading." );
       
   377                 }
       
   378 
       
   379                 $data .= $nextBlock;
       
   380                 $byteCount -= strlen($nextBlock);
       
   381             }
       
   382             if ($byteCount != 0) {
       
   383                 require_once 'Zend/Pdf/Exception.php';
       
   384                 throw new Zend_Pdf_Exception( "Error occured while '$source' file reading." );
       
   385             }
       
   386             fclose($pdfFile);
       
   387 
       
   388             $this->_stringParser = new Zend_Pdf_StringParser($data, $factory);
       
   389         } else {
       
   390             $this->_stringParser = new Zend_Pdf_StringParser($source, $factory);
       
   391         }
       
   392 
       
   393         $pdfVersionComment = $this->_stringParser->readComment();
       
   394         if (substr($pdfVersionComment, 0, 5) != '%PDF-') {
       
   395             require_once 'Zend/Pdf/Exception.php';
       
   396             throw new Zend_Pdf_Exception('File is not a PDF.');
       
   397         }
       
   398 
       
   399         $pdfVersion = substr($pdfVersionComment, 5);
       
   400         if (version_compare($pdfVersion, '0.9',  '<')  ||
       
   401             version_compare($pdfVersion, '1.61', '>=')
       
   402            ) {
       
   403             /**
       
   404              * @todo
       
   405              * To support PDF versions 1.5 (Acrobat 6) and PDF version 1.7 (Acrobat 7)
       
   406              * Stream compression filter must be implemented (for compressed object streams).
       
   407              * Cross reference streams must be implemented
       
   408              */
       
   409             require_once 'Zend/Pdf/Exception.php';
       
   410             throw new Zend_Pdf_Exception(sprintf('Unsupported PDF version. Zend_Pdf supports PDF 1.0-1.4. Current version - \'%f\'', $pdfVersion));
       
   411         }
       
   412         $this->_pdfVersion = $pdfVersion;
       
   413 
       
   414         $this->_stringParser->offset = strrpos($this->_stringParser->data, '%%EOF');
       
   415         if ($this->_stringParser->offset === false ||
       
   416             strlen($this->_stringParser->data) - $this->_stringParser->offset > 7) {
       
   417             require_once 'Zend/Pdf/Exception.php';
       
   418             throw new Zend_Pdf_Exception('Pdf file syntax error. End-of-fle marker expected at the end of file.');
       
   419         }
       
   420 
       
   421         $this->_stringParser->offset--;
       
   422         /**
       
   423          * Go to end of cross-reference table offset
       
   424          */
       
   425         while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&&
       
   426                ($this->_stringParser->offset > 0)) {
       
   427             $this->_stringParser->offset--;
       
   428         }
       
   429         /**
       
   430          * Go to the start of cross-reference table offset
       
   431          */
       
   432         while ( (!Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) ))&&
       
   433                ($this->_stringParser->offset > 0)) {
       
   434             $this->_stringParser->offset--;
       
   435         }
       
   436         /**
       
   437          * Go to the end of 'startxref' keyword
       
   438          */
       
   439         while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&&
       
   440                ($this->_stringParser->offset > 0)) {
       
   441             $this->_stringParser->offset--;
       
   442         }
       
   443         /**
       
   444          * Go to the white space (eol marker) before 'startxref' keyword
       
   445          */
       
   446         $this->_stringParser->offset -= 9;
       
   447 
       
   448         $nextLexeme = $this->_stringParser->readLexeme();
       
   449         if ($nextLexeme != 'startxref') {
       
   450             require_once 'Zend/Pdf/Exception.php';
       
   451             throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. \'startxref\' keyword expected. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme)));
       
   452         }
       
   453 
       
   454         $startXref = $this->_stringParser->readLexeme();
       
   455         if (!ctype_digit($startXref)) {
       
   456             require_once 'Zend/Pdf/Exception.php';
       
   457             throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. Cross-reference table offset must contain only digits. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme)));
       
   458         }
       
   459 
       
   460         $this->_trailer = $this->_loadXRefTable($startXref);
       
   461         $factory->setObjectCount($this->_trailer->Size->value);
       
   462     }
       
   463 
       
   464 
       
   465     /**
       
   466      * Object destructor
       
   467      */
       
   468     public function __destruct()
       
   469     {
       
   470         $this->_stringParser->cleanUp();
       
   471     }
       
   472 }