diff -r bd595ad770fc -r 1c2f13fd785c web/enmi/Zend/Search/Lucene/Document/Pptx.php --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/enmi/Zend/Search/Lucene/Document/Pptx.php Thu Jan 20 19:30:54 2011 +0100 @@ -0,0 +1,200 @@ +open($fileName); + + // Read relations and search for officeDocument + $relationsXml = $package->getFromName('_rels/.rels'); + if ($relationsXml === false) { + require_once 'Zend/Search/Lucene/Exception.php'; + throw new Zend_Search_Lucene_Exception('Invalid archive or corrupted .pptx file.'); + } + $relations = simplexml_load_string($relationsXml); + foreach ($relations->Relationship as $rel) { + if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_OFFICEDOCUMENT) { + // Found office document! Search for slides... + $slideRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/_rels/" . basename($rel["Target"]) . ".rels")) ); + foreach ($slideRelations->Relationship as $slideRel) { + if ($slideRel["Type"] == Zend_Search_Lucene_Document_Pptx::SCHEMA_SLIDERELATION) { + // Found slide! + $slides[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string( + $package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . basename($slideRel["Target"])) ) + ); + + // Search for slide notes + $slideNotesRelations = simplexml_load_string($package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/_rels/" . basename($slideRel["Target"]) . ".rels")) ); + foreach ($slideNotesRelations->Relationship as $slideNoteRel) { + if ($slideNoteRel["Type"] == Zend_Search_Lucene_Document_Pptx::SCHEMA_SLIDENOTESRELATION) { + // Found slide notes! + $slideNotes[ str_replace( 'rId', '', (string)$slideRel["Id"] ) ] = simplexml_load_string( + $package->getFromName( $this->absoluteZipPath(dirname($rel["Target"]) . "/" . dirname($slideRel["Target"]) . "/" . dirname($slideNoteRel["Target"]) . "/" . basename($slideNoteRel["Target"])) ) + ); + + break; + } + } + } + } + + break; + } + } + + // Sort slides + ksort($slides); + ksort($slideNotes); + + // Extract contents from slides + foreach ($slides as $slideKey => $slide) { + // Register namespaces + $slide->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML); + $slide->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML); + + // Fetch all text + $textElements = $slide->xpath('//a:t'); + foreach ($textElements as $textElement) { + $documentBody[] = (string)$textElement; + } + + // Extract contents from slide notes + if (isset($slideNotes[$slideKey])) { + // Fetch slide note + $slideNote = $slideNotes[$slideKey]; + + // Register namespaces + $slideNote->registerXPathNamespace("p", Zend_Search_Lucene_Document_Pptx::SCHEMA_PRESENTATIONML); + $slideNote->registerXPathNamespace("a", Zend_Search_Lucene_Document_Pptx::SCHEMA_DRAWINGML); + + // Fetch all text + $textElements = $slideNote->xpath('//a:t'); + foreach ($textElements as $textElement) { + $documentBody[] = (string)$textElement; + } + } + } + + // Read core properties + $coreProperties = $this->extractMetaData($package); + + // Close file + $package->close(); + + // Store filename + $this->addField(Zend_Search_Lucene_Field::Text('filename', $fileName, 'UTF-8')); + + // Store contents + if ($storeContent) { + $this->addField(Zend_Search_Lucene_Field::Text('body', implode(' ', $documentBody), 'UTF-8')); + } else { + $this->addField(Zend_Search_Lucene_Field::UnStored('body', implode(' ', $documentBody), 'UTF-8')); + } + + // Store meta data properties + foreach ($coreProperties as $key => $value) + { + $this->addField(Zend_Search_Lucene_Field::Text($key, $value, 'UTF-8')); + } + + // Store title (if not present in meta data) + if (!isset($coreProperties['title'])) + { + $this->addField(Zend_Search_Lucene_Field::Text('title', $fileName, 'UTF-8')); + } + } + + /** + * Load Pptx document from a file + * + * @param string $fileName + * @param boolean $storeContent + * @return Zend_Search_Lucene_Document_Pptx + */ + public static function loadPptxFile($fileName, $storeContent = false) + { + return new Zend_Search_Lucene_Document_Pptx($fileName, $storeContent); + } +}