|
1 <?php |
|
2 /** |
|
3 * Zend Framework |
|
4 * |
|
5 * LICENSE |
|
6 * |
|
7 * This source file is subject to the new BSD license that is bundled |
|
8 * with this package in the file LICENSE.txt. |
|
9 * It is also available through the world-wide-web at this URL: |
|
10 * http://framework.zend.com/license/new-bsd |
|
11 * If you did not receive a copy of the license and are unable to |
|
12 * obtain it through the world-wide-web, please send an email |
|
13 * to license@zend.com so we can send you a copy immediately. |
|
14 * |
|
15 * @category Zend |
|
16 * @package Zend_Search_Lucene |
|
17 * @subpackage Document |
|
18 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
19 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
20 * @version $Id: Pptx.php 20096 2010-01-06 02:05:09Z bkarwin $ |
|
21 */ |
|
22 |
|
23 |
|
24 /** Zend_Search_Lucene_Document_OpenXml */ |
|
25 require_once 'Zend/Search/Lucene/Document/OpenXml.php'; |
|
26 |
|
27 /** |
|
28 * Pptx document. |
|
29 * |
|
30 * @category Zend |
|
31 * @package Zend_Search_Lucene |
|
32 * @subpackage Document |
|
33 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
34 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
35 */ |
|
36 class Zend_Search_Lucene_Document_Pptx extends Zend_Search_Lucene_Document_OpenXml |
|
37 { |
|
38 /** |
|
39 * Xml Schema - PresentationML |
|
40 * |
|
41 * @var string |
|
42 */ |
|
43 const SCHEMA_PRESENTATIONML = 'http://schemas.openxmlformats.org/presentationml/2006/main'; |
|
44 |
|
45 /** |
|
46 * Xml Schema - DrawingML |
|
47 * |
|
48 * @var string |
|
49 */ |
|
50 const SCHEMA_DRAWINGML = 'http://schemas.openxmlformats.org/drawingml/2006/main'; |
|
51 |
|
52 /** |
|
53 * Xml Schema - Slide relation |
|
54 * |
|
55 * @var string |
|
56 */ |
|
57 const SCHEMA_SLIDERELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/slide'; |
|
58 |
|
59 /** |
|
60 * Xml Schema - Slide notes relation |
|
61 * |
|
62 * @var string |
|
63 */ |
|
64 const SCHEMA_SLIDENOTESRELATION = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/notesSlide'; |
|
65 |
|
66 /** |
|
67 * Object constructor |
|
68 * |
|
69 * @param string $fileName |
|
70 * @param boolean $storeContent |
|
71 * @throws Zend_Search_Lucene_Exception |
|
72 */ |
|
73 private function __construct($fileName, $storeContent) |
|
74 { |
|
75 if (!class_exists('ZipArchive', false)) { |
|
76 require_once 'Zend/Search/Lucene/Exception.php'; |
|
77 throw new Zend_Search_Lucene_Exception('MS Office documents processing functionality requires Zip extension to be loaded'); |
|
78 } |
|
79 |
|
80 // Document data holders |
|
81 $slides = array(); |
|
82 $slideNotes = array(); |
|
83 $documentBody = array(); |
|
84 $coreProperties = array(); |
|
85 |
|
86 // Open OpenXML package |
|
87 $package = new ZipArchive(); |
|
88 $package->open($fileName); |
|
89 |
|
90 // Read relations and search for officeDocument |
|
91 $relationsXml = $package->getFromName('_rels/.rels'); |
|
92 if ($relationsXml === false) { |
|
93 require_once 'Zend/Search/Lucene/Exception.php'; |
|
94 throw new Zend_Search_Lucene_Exception('Invalid archive or corrupted .pptx file.'); |
|
95 } |
|
96 $relations = simplexml_load_string($relationsXml); |
|
97 foreach ($relations->Relationship as $rel) { |
|
98 if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) { |
|
99 // Found office document! Search for slides... |
|
100 $slideRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/_rels/" . basename($rel["Target"]) . ".rels")) ); |
|
101 foreach ($slideRelations->Relationship as $slideRel) { |
|
102 if ($slideRel["Type"] == Zend_Search_Lucene_Document_Pptx::SCHEMA_SLIDERELATION) { |
|
103 // Found slide! |
|
104 $slides[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string( |
|
105 $package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . basename($slideRel["Target"])) ) |
|
106 ); |
|
107 |
|
108 // Search for slide notes |
|
109 $slideNotesRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/_rels/" . basename($slideRel["Target"]) . ".rels")) ); |
|
110 foreach ($slideNotesRelations->Relationship as $slideNoteRel) { |
|
111 if ($slideNoteRel["Type"] == Zend_Search_Lucene_Document_Pptx::SCHEMA_SLIDENOTESRELATION) { |
|
112 // Found slide notes! |
|
113 $slideNotes[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string( |
|
114 $package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . dirname($slideNoteRel["Target"]) . "/" . basename($slideNoteRel["Target"])) ) |
|
115 ); |
|
116 |
|
117 break; |
|
118 } |
|
119 } |
|
120 } |
|
121 } |
|
122 |
|
123 break; |
|
124 } |
|
125 } |
|
126 |
|
127 // Sort slides |
|
128 ksort($slides); |
|
129 ksort($slideNotes); |
|
130 |
|
131 // Extract contents from slides |
|
132 foreach ($slides as $slideKey => $slide) { |
|
133 // Register namespaces |
|
134 $slide->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML); |
|
135 $slide->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML); |
|
136 |
|
137 // Fetch all text |
|
138 $textElements = $slide->xpath('//a:t'); |
|
139 foreach ($textElements as $textElement) { |
|
140 $documentBody[] = (string)$textElement; |
|
141 } |
|
142 |
|
143 // Extract contents from slide notes |
|
144 if (isset($slideNotes[$slideKey])) { |
|
145 // Fetch slide note |
|
146 $slideNote = $slideNotes[$slideKey]; |
|
147 |
|
148 // Register namespaces |
|
149 $slideNote->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML); |
|
150 $slideNote->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML); |
|
151 |
|
152 // Fetch all text |
|
153 $textElements = $slideNote->xpath('//a:t'); |
|
154 foreach ($textElements as $textElement) { |
|
155 $documentBody[] = (string)$textElement; |
|
156 } |
|
157 } |
|
158 } |
|
159 |
|
160 // Read core properties |
|
161 $coreProperties = $this->extractMetaData($package); |
|
162 |
|
163 // Close file |
|
164 $package->close(); |
|
165 |
|
166 // Store filename |
|
167 $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8')); |
|
168 |
|
169 // Store contents |
|
170 if ($storeContent) { |
|
171 $this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody), 'UTF-8')); |
|
172 } else { |
|
173 $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody), 'UTF-8')); |
|
174 } |
|
175 |
|
176 // Store meta data properties |
|
177 foreach ($coreProperties as $key => $value) |
|
178 { |
|
179 $this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8')); |
|
180 } |
|
181 |
|
182 // Store title (if not present in meta data) |
|
183 if (!isset($coreProperties['title'])) |
|
184 { |
|
185 $this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8')); |
|
186 } |
|
187 } |
|
188 |
|
189 /** |
|
190 * Load Pptx document from a file |
|
191 * |
|
192 * @param string $fileName |
|
193 * @param boolean $storeContent |
|
194 * @return Zend_Search_Lucene_Document_Pptx |
|
195 */ |
|
196 public static function loadPptxFile($fileName, $storeContent = false) |
|
197 { |
|
198 return new Zend_Search_Lucene_Document_Pptx($fileName, $storeContent); |
|
199 } |
|
200 } |