|
1 <?php |
|
2 /** |
|
3 * Zend Framework |
|
4 * |
|
5 * LICENSE |
|
6 * |
|
7 * This source file is subject to the new BSD license that is bundled |
|
8 * with this package in the file LICENSE.txt. |
|
9 * It is also available through the world-wide-web at this URL: |
|
10 * http://framework.zend.com/license/new-bsd |
|
11 * If you did not receive a copy of the license and are unable to |
|
12 * obtain it through the world-wide-web, please send an email |
|
13 * to license@zend.com so we can send you a copy immediately. |
|
14 * |
|
15 * @category Zend |
|
16 * @package Zend_Pdf |
|
17 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
18 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
19 * @version $Id: Parser.php 23395 2010-11-19 15:30:47Z alexander $ |
|
20 */ |
|
21 |
|
22 /** Internally used classes */ |
|
23 require_once 'Zend/Pdf/Element.php'; |
|
24 require_once 'Zend/Pdf/Element/Numeric.php'; |
|
25 |
|
26 |
|
27 /** Zend_Pdf_StringParser */ |
|
28 require_once 'Zend/Pdf/StringParser.php'; |
|
29 |
|
30 |
|
31 /** |
|
32 * PDF file parser |
|
33 * |
|
34 * @package Zend_Pdf |
|
35 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
36 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
37 */ |
|
38 class Zend_Pdf_Parser |
|
39 { |
|
40 /** |
|
41 * String parser |
|
42 * |
|
43 * @var Zend_Pdf_StringParser |
|
44 */ |
|
45 private $_stringParser; |
|
46 |
|
47 /** |
|
48 * Last PDF file trailer |
|
49 * |
|
50 * @var Zend_Pdf_Trailer_Keeper |
|
51 */ |
|
52 private $_trailer; |
|
53 |
|
54 /** |
|
55 * PDF version specified in the file header |
|
56 * |
|
57 * @var string |
|
58 */ |
|
59 private $_pdfVersion; |
|
60 |
|
61 |
|
62 /** |
|
63 * Get length of source PDF |
|
64 * |
|
65 * @return integer |
|
66 */ |
|
67 public function getPDFLength() |
|
68 { |
|
69 return strlen($this->_stringParser->data); |
|
70 } |
|
71 |
|
72 /** |
|
73 * Get PDF String |
|
74 * |
|
75 * @return string |
|
76 */ |
|
77 public function getPDFString() |
|
78 { |
|
79 return $this->_stringParser->data; |
|
80 } |
|
81 |
|
82 /** |
|
83 * PDF version specified in the file header |
|
84 * |
|
85 * @return string |
|
86 */ |
|
87 public function getPDFVersion() |
|
88 { |
|
89 return $this->_pdfVersion; |
|
90 } |
|
91 |
|
92 /** |
|
93 * Load XReference table and referenced objects |
|
94 * |
|
95 * @param integer $offset |
|
96 * @throws Zend_Pdf_Exception |
|
97 * @return Zend_Pdf_Trailer_Keeper |
|
98 */ |
|
99 private function _loadXRefTable($offset) |
|
100 { |
|
101 $this->_stringParser->offset = $offset; |
|
102 |
|
103 require_once 'Zend/Pdf/Element/Reference/Table.php'; |
|
104 $refTable = new Zend_Pdf_Element_Reference_Table(); |
|
105 require_once 'Zend/Pdf/Element/Reference/Context.php'; |
|
106 $context = new Zend_Pdf_Element_Reference_Context($this->_stringParser, $refTable); |
|
107 $this->_stringParser->setContext($context); |
|
108 |
|
109 $nextLexeme = $this->_stringParser->readLexeme(); |
|
110 if ($nextLexeme == 'xref') { |
|
111 /** |
|
112 * Common cross-reference table |
|
113 */ |
|
114 $this->_stringParser->skipWhiteSpace(); |
|
115 while ( ($nextLexeme = $this->_stringParser->readLexeme()) != 'trailer' ) { |
|
116 if (!ctype_digit($nextLexeme)) { |
|
117 require_once 'Zend/Pdf/Exception.php'; |
|
118 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($nextLexeme))); |
|
119 } |
|
120 $objNum = (int)$nextLexeme; |
|
121 |
|
122 $refCount = $this->_stringParser->readLexeme(); |
|
123 if (!ctype_digit($refCount)) { |
|
124 require_once 'Zend/Pdf/Exception.php'; |
|
125 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference table subheader values must contain only digits.', $this->_stringParser->offset-strlen($refCount))); |
|
126 } |
|
127 |
|
128 $this->_stringParser->skipWhiteSpace(); |
|
129 while ($refCount > 0) { |
|
130 $objectOffset = substr($this->_stringParser->data, $this->_stringParser->offset, 10); |
|
131 if (!ctype_digit($objectOffset)) { |
|
132 require_once 'Zend/Pdf/Exception.php'; |
|
133 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset)); |
|
134 } |
|
135 // Force $objectOffset to be treated as decimal instead of octal number |
|
136 for ($numStart = 0; $numStart < strlen($objectOffset)-1; $numStart++) { |
|
137 if ($objectOffset[$numStart] != '0') { |
|
138 break; |
|
139 } |
|
140 } |
|
141 $objectOffset = substr($objectOffset, $numStart); |
|
142 $this->_stringParser->offset += 10; |
|
143 |
|
144 if (strpos("\x00\t\n\f\r ", $this->_stringParser->data[$this->_stringParser->offset]) === false) { |
|
145 require_once 'Zend/Pdf/Exception.php'; |
|
146 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset)); |
|
147 } |
|
148 $this->_stringParser->offset++; |
|
149 |
|
150 $genNumber = substr($this->_stringParser->data, $this->_stringParser->offset, 5); |
|
151 if (!ctype_digit($objectOffset)) { |
|
152 require_once 'Zend/Pdf/Exception.php'; |
|
153 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Offset must contain only digits.', $this->_stringParser->offset)); |
|
154 } |
|
155 // Force $objectOffset to be treated as decimal instead of octal number |
|
156 for ($numStart = 0; $numStart < strlen($genNumber)-1; $numStart++) { |
|
157 if ($genNumber[$numStart] != '0') { |
|
158 break; |
|
159 } |
|
160 } |
|
161 $genNumber = substr($genNumber, $numStart); |
|
162 $this->_stringParser->offset += 5; |
|
163 |
|
164 if (strpos("\x00\t\n\f\r ", $this->_stringParser->data[$this->_stringParser->offset]) === false) { |
|
165 require_once 'Zend/Pdf/Exception.php'; |
|
166 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset)); |
|
167 } |
|
168 $this->_stringParser->offset++; |
|
169 |
|
170 $inUseKey = $this->_stringParser->data[$this->_stringParser->offset]; |
|
171 $this->_stringParser->offset++; |
|
172 |
|
173 switch ($inUseKey) { |
|
174 case 'f': |
|
175 // free entry |
|
176 unset( $this->_refTable[$objNum . ' ' . $genNumber . ' R'] ); |
|
177 $refTable->addReference($objNum . ' ' . $genNumber . ' R', |
|
178 $objectOffset, |
|
179 false); |
|
180 break; |
|
181 |
|
182 case 'n': |
|
183 // in-use entry |
|
184 |
|
185 $refTable->addReference($objNum . ' ' . $genNumber . ' R', |
|
186 $objectOffset, |
|
187 true); |
|
188 } |
|
189 |
|
190 if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) { |
|
191 require_once 'Zend/Pdf/Exception.php'; |
|
192 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset)); |
|
193 } |
|
194 $this->_stringParser->offset++; |
|
195 if ( !Zend_Pdf_StringParser::isWhiteSpace(ord( $this->_stringParser->data[$this->_stringParser->offset] )) ) { |
|
196 require_once 'Zend/Pdf/Exception.php'; |
|
197 throw new Zend_Pdf_Exception(sprintf('PDF file cross-reference table syntax error. Offset - 0x%X. Value separator must be white space.', $this->_stringParser->offset)); |
|
198 } |
|
199 $this->_stringParser->offset++; |
|
200 |
|
201 $refCount--; |
|
202 $objNum++; |
|
203 } |
|
204 } |
|
205 |
|
206 $trailerDictOffset = $this->_stringParser->offset; |
|
207 $trailerDict = $this->_stringParser->readElement(); |
|
208 if (!$trailerDict instanceof Zend_Pdf_Element_Dictionary) { |
|
209 require_once 'Zend/Pdf/Exception.php'; |
|
210 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Dictionary expected after \'trailer\' keyword.', $trailerDictOffset)); |
|
211 } |
|
212 } else { |
|
213 $xrefStream = $this->_stringParser->getObject($offset, $context); |
|
214 |
|
215 if (!$xrefStream instanceof Zend_Pdf_Element_Object_Stream) { |
|
216 require_once 'Zend/Pdf/Exception.php'; |
|
217 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference stream expected.', $offset)); |
|
218 } |
|
219 |
|
220 $trailerDict = $xrefStream->dictionary; |
|
221 if ($trailerDict->Type->value != 'XRef') { |
|
222 require_once 'Zend/Pdf/Exception.php'; |
|
223 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross-reference stream object must have /Type property assigned to /XRef.', $offset)); |
|
224 } |
|
225 if ($trailerDict->W === null || $trailerDict->W->getType() != Zend_Pdf_Element::TYPE_ARRAY) { |
|
226 require_once 'Zend/Pdf/Exception.php'; |
|
227 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary doesn\'t have W entry or it\'s not an array.', $offset)); |
|
228 } |
|
229 |
|
230 $entryField1Size = $trailerDict->W->items[0]->value; |
|
231 $entryField2Size = $trailerDict->W->items[1]->value; |
|
232 $entryField3Size = $trailerDict->W->items[2]->value; |
|
233 |
|
234 if ($entryField2Size == 0 || $entryField3Size == 0) { |
|
235 require_once 'Zend/Pdf/Exception.php'; |
|
236 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Wrong W dictionary entry. Only type field of stream entries has default value and could be zero length.', $offset)); |
|
237 } |
|
238 |
|
239 $xrefStreamData = $xrefStream->value; |
|
240 |
|
241 if ($trailerDict->Index !== null) { |
|
242 if ($trailerDict->Index->getType() != Zend_Pdf_Element::TYPE_ARRAY) { |
|
243 require_once 'Zend/Pdf/Exception.php'; |
|
244 throw new Zend_Pdf_Exception(sprintf('PDF file syntax error. Offset - 0x%X. Cross reference stream dictionary Index entry must be an array.', $offset)); |
|
245 } |
|
246 $sections = count($trailerDict->Index->items)/2; |
|
247 } else { |
|
248 $sections = 1; |
|
249 } |
|
250 |
|
251 $streamOffset = 0; |
|
252 |
|
253 $size = $entryField1Size + $entryField2Size + $entryField3Size; |
|
254 $entries = strlen($xrefStreamData)/$size; |
|
255 |
|
256 for ($count = 0; $count < $sections; $count++) { |
|
257 if ($trailerDict->Index !== null) { |
|
258 $objNum = $trailerDict->Index->items[$count*2 ]->value; |
|
259 $entries = $trailerDict->Index->items[$count*2 + 1]->value; |
|
260 } else { |
|
261 $objNum = 0; |
|
262 $entries = $trailerDict->Size->value; |
|
263 } |
|
264 |
|
265 for ($count2 = 0; $count2 < $entries; $count2++) { |
|
266 if ($entryField1Size == 0) { |
|
267 $type = 1; |
|
268 } else if ($entryField1Size == 1) { // Optimyze one-byte field case |
|
269 $type = ord($xrefStreamData[$streamOffset++]); |
|
270 } else { |
|
271 $type = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField1Size); |
|
272 $streamOffset += $entryField1Size; |
|
273 } |
|
274 |
|
275 if ($entryField2Size == 1) { // Optimyze one-byte field case |
|
276 $field2 = ord($xrefStreamData[$streamOffset++]); |
|
277 } else { |
|
278 $field2 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField2Size); |
|
279 $streamOffset += $entryField2Size; |
|
280 } |
|
281 |
|
282 if ($entryField3Size == 1) { // Optimyze one-byte field case |
|
283 $field3 = ord($xrefStreamData[$streamOffset++]); |
|
284 } else { |
|
285 $field3 = Zend_Pdf_StringParser::parseIntFromStream($xrefStreamData, $streamOffset, $entryField3Size); |
|
286 $streamOffset += $entryField3Size; |
|
287 } |
|
288 |
|
289 switch ($type) { |
|
290 case 0: |
|
291 // Free object |
|
292 $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, false); |
|
293 // Debug output: |
|
294 // echo "Free object - $objNum $field3 R, next free - $field2\n"; |
|
295 break; |
|
296 |
|
297 case 1: |
|
298 // In use object |
|
299 $refTable->addReference($objNum . ' ' . $field3 . ' R', $field2, true); |
|
300 // Debug output: |
|
301 // echo "In-use object - $objNum $field3 R, offset - $field2\n"; |
|
302 break; |
|
303 |
|
304 case 2: |
|
305 // Object in an object stream |
|
306 // Debug output: |
|
307 // echo "Compressed object - $objNum 0 R, object stream - $field2 0 R, offset - $field3\n"; |
|
308 break; |
|
309 } |
|
310 |
|
311 $objNum++; |
|
312 } |
|
313 } |
|
314 |
|
315 // $streamOffset . ' ' . strlen($xrefStreamData) . "\n"; |
|
316 // "$entries\n"; |
|
317 require_once 'Zend/Pdf/Exception.php'; |
|
318 throw new Zend_Pdf_Exception('Cross-reference streams are not supported yet.'); |
|
319 } |
|
320 |
|
321 |
|
322 require_once 'Zend/Pdf/Trailer/Keeper.php'; |
|
323 $trailerObj = new Zend_Pdf_Trailer_Keeper($trailerDict, $context); |
|
324 if ($trailerDict->Prev instanceof Zend_Pdf_Element_Numeric || |
|
325 $trailerDict->Prev instanceof Zend_Pdf_Element_Reference ) { |
|
326 $trailerObj->setPrev($this->_loadXRefTable($trailerDict->Prev->value)); |
|
327 $context->getRefTable()->setParent($trailerObj->getPrev()->getRefTable()); |
|
328 } |
|
329 |
|
330 /** |
|
331 * We set '/Prev' dictionary property to the current cross-reference section offset. |
|
332 * It doesn't correspond to the actual data, but is true when trailer will be used |
|
333 * as a trailer for next generated PDF section. |
|
334 */ |
|
335 $trailerObj->Prev = new Zend_Pdf_Element_Numeric($offset); |
|
336 |
|
337 return $trailerObj; |
|
338 } |
|
339 |
|
340 |
|
341 /** |
|
342 * Get Trailer object |
|
343 * |
|
344 * @return Zend_Pdf_Trailer_Keeper |
|
345 */ |
|
346 public function getTrailer() |
|
347 { |
|
348 return $this->_trailer; |
|
349 } |
|
350 |
|
351 /** |
|
352 * Object constructor |
|
353 * |
|
354 * Note: PHP duplicates string, which is sent by value, only of it's updated. |
|
355 * Thus we don't need to care about overhead |
|
356 * |
|
357 * @param mixed $source |
|
358 * @param Zend_Pdf_ElementFactory_Interface $factory |
|
359 * @param boolean $load |
|
360 * @throws Zend_Exception |
|
361 */ |
|
362 public function __construct($source, Zend_Pdf_ElementFactory_Interface $factory, $load) |
|
363 { |
|
364 if ($load) { |
|
365 if (($pdfFile = @fopen($source, 'rb')) === false ) { |
|
366 require_once 'Zend/Pdf/Exception.php'; |
|
367 throw new Zend_Pdf_Exception( "Can not open '$source' file for reading." ); |
|
368 } |
|
369 |
|
370 $data = ''; |
|
371 $byteCount = filesize($source); |
|
372 while ($byteCount > 0 && !feof($pdfFile)) { |
|
373 $nextBlock = fread($pdfFile, $byteCount); |
|
374 if ($nextBlock === false) { |
|
375 require_once 'Zend/Pdf/Exception.php'; |
|
376 throw new Zend_Pdf_Exception( "Error occured while '$source' file reading." ); |
|
377 } |
|
378 |
|
379 $data .= $nextBlock; |
|
380 $byteCount -= strlen($nextBlock); |
|
381 } |
|
382 if ($byteCount != 0) { |
|
383 require_once 'Zend/Pdf/Exception.php'; |
|
384 throw new Zend_Pdf_Exception( "Error occured while '$source' file reading." ); |
|
385 } |
|
386 fclose($pdfFile); |
|
387 |
|
388 $this->_stringParser = new Zend_Pdf_StringParser($data, $factory); |
|
389 } else { |
|
390 $this->_stringParser = new Zend_Pdf_StringParser($source, $factory); |
|
391 } |
|
392 |
|
393 $pdfVersionComment = $this->_stringParser->readComment(); |
|
394 if (substr($pdfVersionComment, 0, 5) != '%PDF-') { |
|
395 require_once 'Zend/Pdf/Exception.php'; |
|
396 throw new Zend_Pdf_Exception('File is not a PDF.'); |
|
397 } |
|
398 |
|
399 $pdfVersion = substr($pdfVersionComment, 5); |
|
400 if (version_compare($pdfVersion, '0.9', '<') || |
|
401 version_compare($pdfVersion, '1.61', '>=') |
|
402 ) { |
|
403 /** |
|
404 * @todo |
|
405 * To support PDF versions 1.5 (Acrobat 6) and PDF version 1.7 (Acrobat 7) |
|
406 * Stream compression filter must be implemented (for compressed object streams). |
|
407 * Cross reference streams must be implemented |
|
408 */ |
|
409 require_once 'Zend/Pdf/Exception.php'; |
|
410 throw new Zend_Pdf_Exception(sprintf('Unsupported PDF version. Zend_Pdf supports PDF 1.0-1.4. Current version - \'%f\'', $pdfVersion)); |
|
411 } |
|
412 $this->_pdfVersion = $pdfVersion; |
|
413 |
|
414 $this->_stringParser->offset = strrpos($this->_stringParser->data, '%%EOF'); |
|
415 if ($this->_stringParser->offset === false || |
|
416 strlen($this->_stringParser->data) - $this->_stringParser->offset > 7) { |
|
417 require_once 'Zend/Pdf/Exception.php'; |
|
418 throw new Zend_Pdf_Exception('Pdf file syntax error. End-of-fle marker expected at the end of file.'); |
|
419 } |
|
420 |
|
421 $this->_stringParser->offset--; |
|
422 /** |
|
423 * Go to end of cross-reference table offset |
|
424 */ |
|
425 while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&& |
|
426 ($this->_stringParser->offset > 0)) { |
|
427 $this->_stringParser->offset--; |
|
428 } |
|
429 /** |
|
430 * Go to the start of cross-reference table offset |
|
431 */ |
|
432 while ( (!Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) ))&& |
|
433 ($this->_stringParser->offset > 0)) { |
|
434 $this->_stringParser->offset--; |
|
435 } |
|
436 /** |
|
437 * Go to the end of 'startxref' keyword |
|
438 */ |
|
439 while (Zend_Pdf_StringParser::isWhiteSpace( ord($this->_stringParser->data[$this->_stringParser->offset]) )&& |
|
440 ($this->_stringParser->offset > 0)) { |
|
441 $this->_stringParser->offset--; |
|
442 } |
|
443 /** |
|
444 * Go to the white space (eol marker) before 'startxref' keyword |
|
445 */ |
|
446 $this->_stringParser->offset -= 9; |
|
447 |
|
448 $nextLexeme = $this->_stringParser->readLexeme(); |
|
449 if ($nextLexeme != 'startxref') { |
|
450 require_once 'Zend/Pdf/Exception.php'; |
|
451 throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. \'startxref\' keyword expected. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme))); |
|
452 } |
|
453 |
|
454 $startXref = $this->_stringParser->readLexeme(); |
|
455 if (!ctype_digit($startXref)) { |
|
456 require_once 'Zend/Pdf/Exception.php'; |
|
457 throw new Zend_Pdf_Exception(sprintf('Pdf file syntax error. Cross-reference table offset must contain only digits. Offset - 0x%X.', $this->_stringParser->offset-strlen($nextLexeme))); |
|
458 } |
|
459 |
|
460 $this->_trailer = $this->_loadXRefTable($startXref); |
|
461 $factory->setObjectCount($this->_trailer->Size->value); |
|
462 } |
|
463 |
|
464 |
|
465 /** |
|
466 * Object destructor |
|
467 */ |
|
468 public function __destruct() |
|
469 { |
|
470 $this->_stringParser->cleanUp(); |
|
471 } |
|
472 } |