web/lib/Zend/Search/Lucene/Document/OpenXml.php
changeset 64 162c1de6545a
parent 19 1c2f13fd785c
child 68 ecaf28ffe26e
equal deleted inserted replaced
63:5b37998e522e 64:162c1de6545a
       
     1 <?php
       
     2 /**
       
     3  * Zend Framework
       
     4  *
       
     5  * LICENSE
       
     6  *
       
     7  * This source file is subject to the new BSD license that is bundled
       
     8  * with this package in the file LICENSE.txt.
       
     9  * It is also available through the world-wide-web at this URL:
       
    10  * http://framework.zend.com/license/new-bsd
       
    11  * If you did not receive a copy of the license and are unable to
       
    12  * obtain it through the world-wide-web, please send an email
       
    13  * to license@zend.com so we can send you a copy immediately.
       
    14  *
       
    15  * @category   Zend
       
    16  * @package    Zend_Search_Lucene
       
    17  * @subpackage Document
       
    18  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    19  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    20  * @version    $Id: OpenXml.php 20096 2010-01-06 02:05:09Z bkarwin $
       
    21  */
       
    22 
       
    23 
       
    24 /** Zend_Search_Lucene_Document */
       
    25 require_once 'Zend/Search/Lucene/Document.php';
       
    26 
       
    27 
       
    28 /**
       
    29  * OpenXML document.
       
    30  *
       
    31  * @category   Zend
       
    32  * @package    Zend_Search_Lucene
       
    33  * @subpackage Document
       
    34  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    35  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    36  */
       
    37 abstract class Zend_Search_Lucene_Document_OpenXml extends Zend_Search_Lucene_Document
       
    38 {
       
    39     /**
       
    40      * Xml Schema - Relationships
       
    41      *
       
    42      * @var string
       
    43      */
       
    44     const SCHEMA_RELATIONSHIP = 'http://schemas.openxmlformats.org/package/2006/relationships';
       
    45 
       
    46     /**
       
    47      * Xml Schema - Office document
       
    48      *
       
    49      * @var string
       
    50      */
       
    51     const SCHEMA_OFFICEDOCUMENT = 'http://schemas.openxmlformats.org/officeDocument/2006/relationships/officeDocument';
       
    52 
       
    53     /**
       
    54      * Xml Schema - Core properties
       
    55      *
       
    56      * @var string
       
    57      */
       
    58     const SCHEMA_COREPROPERTIES = 'http://schemas.openxmlformats.org/package/2006/relationships/metadata/core-properties';
       
    59 
       
    60     /**
       
    61      * Xml Schema - Dublin Core
       
    62      *
       
    63      * @var string
       
    64      */
       
    65     const SCHEMA_DUBLINCORE = 'http://purl.org/dc/elements/1.1/';
       
    66 
       
    67     /**
       
    68      * Xml Schema - Dublin Core Terms
       
    69      *
       
    70      * @var string
       
    71      */
       
    72     const SCHEMA_DUBLINCORETERMS = 'http://purl.org/dc/terms/';
       
    73 
       
    74     /**
       
    75      * Extract metadata from document
       
    76      *
       
    77      * @param ZipArchive $package    ZipArchive OpenXML package
       
    78      * @return array    Key-value pairs containing document meta data
       
    79      */
       
    80     protected function extractMetaData(ZipArchive $package)
       
    81     {
       
    82         // Data holders
       
    83         $coreProperties = array();
       
    84 
       
    85         // Read relations and search for core properties
       
    86         $relations = simplexml_load_string($package->getFromName("_rels/.rels"));
       
    87         foreach ($relations->Relationship as $rel) {
       
    88             if ($rel["Type"] == Zend_Search_Lucene_Document_OpenXml::SCHEMA_COREPROPERTIES) {
       
    89                 // Found core properties! Read in contents...
       
    90                 $contents = simplexml_load_string(
       
    91                     $package->getFromName(dirname($rel["Target"]) . "/" . basename($rel["Target"]))
       
    92                 );
       
    93 
       
    94                 foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_DUBLINCORE) as $child) {
       
    95                     $coreProperties[$child->getName()] = (string)$child;
       
    96                 }
       
    97                 foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_COREPROPERTIES) as $child) {
       
    98                     $coreProperties[$child->getName()] = (string)$child;
       
    99                 }
       
   100                 foreach ($contents->children(Zend_Search_Lucene_Document_OpenXml::SCHEMA_DUBLINCORETERMS) as $child) {
       
   101                     $coreProperties[$child->getName()] = (string)$child;
       
   102                 }
       
   103             }
       
   104         }
       
   105 
       
   106         return $coreProperties;
       
   107     }
       
   108 
       
   109     /**
       
   110      * Determine absolute zip path
       
   111      *
       
   112      * @param string $path
       
   113      * @return string
       
   114      */
       
   115     protected function absoluteZipPath($path) {
       
   116         $path = str_replace(array('/', '\\'), DIRECTORY_SEPARATOR, $path);
       
   117         $parts = array_filter(explode(DIRECTORY_SEPARATOR, $path), 'strlen');
       
   118         $absolutes = array();
       
   119         foreach ($parts as $part) {
       
   120             if ('.' == $part) continue;
       
   121             if ('..' == $part) {
       
   122                 array_pop($absolutes);
       
   123             } else {
       
   124                 $absolutes[] = $part;
       
   125             }
       
   126         }
       
   127         return implode('/', $absolutes);
       
   128     }
       
   129 }