|
1 <?php |
|
2 /** |
|
3 * Zend Framework |
|
4 * |
|
5 * LICENSE |
|
6 * |
|
7 * This source file is subject to the new BSD license that is bundled |
|
8 * with this package in the file LICENSE.txt. |
|
9 * It is also available through the world-wide-web at this URL: |
|
10 * http://framework.zend.com/license/new-bsd |
|
11 * If you did not receive a copy of the license and are unable to |
|
12 * obtain it through the world-wide-web, please send an email |
|
13 * to license@zend.com so we can send you a copy immediately. |
|
14 * |
|
15 * @category Zend |
|
16 * @package Zend_Search_Lucene |
|
17 * @subpackage Document |
|
18 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
19 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
20 * @version $Id: OpenXml.php 20096 2010-01-06 02:05:09Z bkarwin $ |
|
21 */ |
|
22 |
|
23 |
|
24 /** Zend_Search_Lucene_Document */ |
|
25 require_once 'Zend/Search/Lucene/Document.php'; |
|
26 |
|
27 |
|
28 /** |
|
29 * OpenXML document. |
|
30 * |
|
31 * @category Zend |
|
32 * @package Zend_Search_Lucene |
|
33 * @subpackage Document |
|
34 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
35 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
36 */ |
|
37 abstract class Zend_Search_Lucene_Document_OpenXml extends Zend_Search_Lucene_Document |
|
38 { |
|
39 /** |
|
40 * Xml Schema - Relationships |
|
41 * |
|
42 * @var string |
|
43 */ |
|
44 const SCHEMA_RELATIONSHIP = 'http://schemas.openxmlformats.org/package/2006/relationships'; |
|
45 |
|
46 /** |
|
47 * Xml Schema - Office document |
|
48 * |
|
49 * @var string |
|
50 */ |
|
51 const SCHEMA_OFFICEDOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument'; |
|
52 |
|
53 /** |
|
54 * Xml Schema - Core properties |
|
55 * |
|
56 * @var string |
|
57 */ |
|
58 const SCHEMA_COREPROPERTIES = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties'; |
|
59 |
|
60 /** |
|
61 * Xml Schema - Dublin Core |
|
62 * |
|
63 * @var string |
|
64 */ |
|
65 const SCHEMA_DUBLINCORE = 'http://purl.org/dc/elements/1.1/'; |
|
66 |
|
67 /** |
|
68 * Xml Schema - Dublin Core Terms |
|
69 * |
|
70 * @var string |
|
71 */ |
|
72 const SCHEMA_DUBLINCORETERMS = 'http://purl.org/dc/terms/'; |
|
73 |
|
74 /** |
|
75 * Extract metadata from document |
|
76 * |
|
77 * @param ZipArchive $package ZipArchive OpenXML package |
|
78 * @return array Key-value pairs containing document meta data |
|
79 */ |
|
80 protected function extractMetaData(ZipArchive $package) |
|
81 { |
|
82 // Data holders |
|
83 $coreProperties = array(); |
|
84 |
|
85 // Read relations and search for core properties |
|
86 $relations = simplexml_load_string($package->getFromName("_rels/.rels")); |
|
87 foreach ($relations->Relationship as $rel) { |
|
88 if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_COREPROPERTIES) { |
|
89 // Found core properties! Read in contents... |
|
90 $contents = simplexml_load_string( |
|
91 $package->getFromName(dirname($rel["Target"]) . "/" . basename($rel["Target"])) |
|
92 ); |
|
93 |
|
94 foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_DUBLINCORE) as $child) { |
|
95 $coreProperties[$child->getName()] = (string)$child; |
|
96 } |
|
97 foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_COREPROPERTIES) as $child) { |
|
98 $coreProperties[$child->getName()] = (string)$child; |
|
99 } |
|
100 foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_DUBLINCORETERMS) as $child) { |
|
101 $coreProperties[$child->getName()] = (string)$child; |
|
102 } |
|
103 } |
|
104 } |
|
105 |
|
106 return $coreProperties; |
|
107 } |
|
108 |
|
109 /** |
|
110 * Determine absolute zip path |
|
111 * |
|
112 * @param string $path |
|
113 * @return string |
|
114 */ |
|
115 protected function absoluteZipPath($path) { |
|
116 $path = str_replace(array('/', '\\'), DIRECTORY_SEPARATOR, $path); |
|
117 $parts = array_filter(explode(DIRECTORY_SEPARATOR, $path), 'strlen'); |
|
118 $absolutes = array(); |
|
119 foreach ($parts as $part) { |
|
120 if ('.' == $part) continue; |
|
121 if ('..' == $part) { |
|
122 array_pop($absolutes); |
|
123 } else { |
|
124 $absolutes[] = $part; |
|
125 } |
|
126 } |
|
127 return implode('/', $absolutes); |
|
128 } |
|
129 } |