web/Zend/Search/Lucene.php
changeset 0 4eba9c11703f
equal deleted inserted replaced
-1:000000000000 0:4eba9c11703f
       
     1 <?php
       
     2 /**
       
     3  * Zend Framework
       
     4  *
       
     5  * LICENSE
       
     6  *
       
     7  * This source file is subject to the new BSD license that is bundled
       
     8  * with this package in the file LICENSE.txt.
       
     9  * It is also available through the world-wide-web at this URL:
       
    10  * http://framework.zend.com/license/new-bsd
       
    11  * If you did not receive a copy of the license and are unable to
       
    12  * obtain it through the world-wide-web, please send an email
       
    13  * to license@zend.com so we can send you a copy immediately.
       
    14  *
       
    15  * @category   Zend
       
    16  * @package    Zend_Search_Lucene
       
    17  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    18  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    19  * @version    $Id: Lucene.php 22987 2010-09-21 10:39:53Z alexander $
       
    20  */
       
    21 
       
    22 
       
    23 /** User land classes and interfaces turned on by Zend/Search/Lucene.php file inclusion. */
       
    24 /** @todo Section should be removed with ZF 2.0 release as obsolete                      */
       
    25 
       
    26 /** Zend_Search_Lucene_Document_Html */
       
    27 require_once 'Zend/Search/Lucene/Document/Html.php';
       
    28 
       
    29 /** Zend_Search_Lucene_Document_Docx */
       
    30 require_once 'Zend/Search/Lucene/Document/Docx.php';
       
    31 
       
    32 /** Zend_Search_Lucene_Document_Pptx */
       
    33 require_once 'Zend/Search/Lucene/Document/Pptx.php';
       
    34 
       
    35 /** Zend_Search_Lucene_Document_Xlsx */
       
    36 require_once 'Zend/Search/Lucene/Document/Xlsx.php';
       
    37 
       
    38 /** Zend_Search_Lucene_Search_QueryParser */
       
    39 require_once 'Zend/Search/Lucene/Search/QueryParser.php';
       
    40 
       
    41 /** Zend_Search_Lucene_Search_QueryHit */
       
    42 require_once 'Zend/Search/Lucene/Search/QueryHit.php';
       
    43 
       
    44 /** Zend_Search_Lucene_Analysis_Analyzer */
       
    45 require_once 'Zend/Search/Lucene/Analysis/Analyzer.php';
       
    46 
       
    47 /** Zend_Search_Lucene_Search_Query_Term */
       
    48 require_once 'Zend/Search/Lucene/Search/Query/Term.php';
       
    49 
       
    50 /** Zend_Search_Lucene_Search_Query_Phrase */
       
    51 require_once 'Zend/Search/Lucene/Search/Query/Phrase.php';
       
    52 
       
    53 /** Zend_Search_Lucene_Search_Query_MultiTerm */
       
    54 require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php';
       
    55 
       
    56 /** Zend_Search_Lucene_Search_Query_Wildcard */
       
    57 require_once 'Zend/Search/Lucene/Search/Query/Wildcard.php';
       
    58 
       
    59 /** Zend_Search_Lucene_Search_Query_Range */
       
    60 require_once 'Zend/Search/Lucene/Search/Query/Range.php';
       
    61 
       
    62 /** Zend_Search_Lucene_Search_Query_Fuzzy */
       
    63 require_once 'Zend/Search/Lucene/Search/Query/Fuzzy.php';
       
    64 
       
    65 /** Zend_Search_Lucene_Search_Query_Boolean */
       
    66 require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
       
    67 
       
    68 /** Zend_Search_Lucene_Search_Query_Empty */
       
    69 require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
       
    70 
       
    71 /** Zend_Search_Lucene_Search_Query_Insignificant */
       
    72 require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php';
       
    73 
       
    74 
       
    75 
       
    76 
       
    77 /** Internally used classes */
       
    78 
       
    79 /** Zend_Search_Lucene_Interface */
       
    80 require_once 'Zend/Search/Lucene/Interface.php';
       
    81 
       
    82 /** Zend_Search_Lucene_Index_SegmentInfo */
       
    83 require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
       
    84 
       
    85 /** Zend_Search_Lucene_LockManager */
       
    86 require_once 'Zend/Search/Lucene/LockManager.php';
       
    87 
       
    88 
       
    89 /**
       
    90  * @category   Zend
       
    91  * @package    Zend_Search_Lucene
       
    92  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    93  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    94  */
       
    95 class Zend_Search_Lucene implements Zend_Search_Lucene_Interface
       
    96 {
       
    97     /**
       
    98      * Default field name for search
       
    99      *
       
   100      * Null means search through all fields
       
   101      *
       
   102      * @var string
       
   103      */
       
   104     private static $_defaultSearchField = null;
       
   105 
       
   106     /**
       
   107      * Result set limit
       
   108      *
       
   109      * 0 means no limit
       
   110      *
       
   111      * @var integer
       
   112      */
       
   113     private static $_resultSetLimit = 0;
       
   114 
       
   115     /**
       
   116      * Terms per query limit
       
   117      *
       
   118      * 0 means no limit
       
   119      *
       
   120      * @var integer
       
   121      */
       
   122     private static $_termsPerQueryLimit = 1024;
       
   123 
       
   124     /**
       
   125      * File system adapter.
       
   126      *
       
   127      * @var Zend_Search_Lucene_Storage_Directory
       
   128      */
       
   129     private $_directory = null;
       
   130 
       
   131     /**
       
   132      * File system adapter closing option
       
   133      *
       
   134      * @var boolean
       
   135      */
       
   136     private $_closeDirOnExit = true;
       
   137 
       
   138     /**
       
   139      * Writer for this index, not instantiated unless required.
       
   140      *
       
   141      * @var Zend_Search_Lucene_Index_Writer
       
   142      */
       
   143     private $_writer = null;
       
   144 
       
   145     /**
       
   146      * Array of Zend_Search_Lucene_Index_SegmentInfo objects for current version of index.
       
   147      *
       
   148      * @var array Zend_Search_Lucene_Index_SegmentInfo
       
   149      */
       
   150     private $_segmentInfos = array();
       
   151 
       
   152     /**
       
   153      * Number of documents in this index.
       
   154      *
       
   155      * @var integer
       
   156      */
       
   157     private $_docCount = 0;
       
   158 
       
   159     /**
       
   160      * Flag for index changes
       
   161      *
       
   162      * @var boolean
       
   163      */
       
   164     private $_hasChanges = false;
       
   165 
       
   166 
       
   167     /**
       
   168      * Signal, that index is already closed, changes are fixed and resources are cleaned up
       
   169      *
       
   170      * @var boolean
       
   171      */
       
   172     private $_closed = false;
       
   173 
       
   174     /**
       
   175      * Number of references to the index object
       
   176      *
       
   177      * @var integer
       
   178      */
       
   179     private $_refCount = 0;
       
   180 
       
   181     /**
       
   182      * Current segment generation
       
   183      *
       
   184      * @var integer
       
   185      */
       
   186     private $_generation;
       
   187 
       
   188     const FORMAT_PRE_2_1 = 0;
       
   189     const FORMAT_2_1     = 1;
       
   190     const FORMAT_2_3     = 2;
       
   191 
       
   192 
       
   193     /**
       
   194      * Index format version
       
   195      *
       
   196      * @var integer
       
   197      */
       
   198     private $_formatVersion;
       
   199 
       
   200     /**
       
   201      * Create index
       
   202      *
       
   203      * @param mixed $directory
       
   204      * @return Zend_Search_Lucene_Interface
       
   205      */
       
   206     public static function create($directory)
       
   207     {
       
   208         /** Zend_Search_Lucene_Proxy */
       
   209         require_once 'Zend/Search/Lucene/Proxy.php';
       
   210 
       
   211         return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, true));
       
   212     }
       
   213 
       
   214     /**
       
   215      * Open index
       
   216      *
       
   217      * @param mixed $directory
       
   218      * @return Zend_Search_Lucene_Interface
       
   219      */
       
   220     public static function open($directory)
       
   221     {
       
   222         /** Zend_Search_Lucene_Proxy */
       
   223         require_once 'Zend/Search/Lucene/Proxy.php';
       
   224 
       
   225         return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, false));
       
   226     }
       
   227 
       
   228     /** Generation retrieving counter */
       
   229     const GENERATION_RETRIEVE_COUNT = 10;
       
   230 
       
   231     /** Pause between generation retrieving attempts in milliseconds */
       
   232     const GENERATION_RETRIEVE_PAUSE = 50;
       
   233 
       
   234     /**
       
   235      * Get current generation number
       
   236      *
       
   237      * Returns generation number
       
   238      * 0 means pre-2.1 index format
       
   239      * -1 means there are no segments files.
       
   240      *
       
   241      * @param Zend_Search_Lucene_Storage_Directory $directory
       
   242      * @return integer
       
   243      * @throws Zend_Search_Lucene_Exception
       
   244      */
       
   245     public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory)
       
   246     {
       
   247         /**
       
   248          * Zend_Search_Lucene uses segments.gen file to retrieve current generation number
       
   249          *
       
   250          * Apache Lucene index format documentation mentions this method only as a fallback method
       
   251          *
       
   252          * Nevertheless we use it according to the performance considerations
       
   253          *
       
   254          * @todo check if we can use some modification of Apache Lucene generation determination algorithm
       
   255          *       without performance problems
       
   256          */
       
   257 
       
   258         require_once 'Zend/Search/Lucene/Exception.php';
       
   259         try {
       
   260             for ($count = 0; $count < self::GENERATION_RETRIEVE_COUNT; $count++) {
       
   261                 // Try to get generation file
       
   262                 $genFile = $directory->getFileObject('segments.gen', false);
       
   263 
       
   264                 $format = $genFile->readInt();
       
   265                 if ($format != (int)0xFFFFFFFE) {
       
   266                     throw new Zend_Search_Lucene_Exception('Wrong segments.gen file format');
       
   267                 }
       
   268 
       
   269                 $gen1 = $genFile->readLong();
       
   270                 $gen2 = $genFile->readLong();
       
   271 
       
   272                 if ($gen1 == $gen2) {
       
   273                     return $gen1;
       
   274                 }
       
   275 
       
   276                 usleep(self::GENERATION_RETRIEVE_PAUSE * 1000);
       
   277             }
       
   278 
       
   279             // All passes are failed
       
   280             throw new Zend_Search_Lucene_Exception('Index is under processing now');
       
   281         } catch (Zend_Search_Lucene_Exception $e) {
       
   282             if (strpos($e->getMessage(), 'is not readable') !== false) {
       
   283                 try {
       
   284                     // Try to open old style segments file
       
   285                     $segmentsFile = $directory->getFileObject('segments', false);
       
   286 
       
   287                     // It's pre-2.1 index
       
   288                     return 0;
       
   289                 } catch (Zend_Search_Lucene_Exception $e) {
       
   290                     if (strpos($e->getMessage(), 'is not readable') !== false) {
       
   291                         return -1;
       
   292                     } else {
       
   293                         throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
       
   294                     }
       
   295                 }
       
   296             } else {
       
   297                 throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
       
   298             }
       
   299         }
       
   300 
       
   301         return -1;
       
   302     }
       
   303 
       
   304     /**
       
   305      * Get generation number associated with this index instance
       
   306      *
       
   307      * The same generation number in pair with document number or query string
       
   308      * guarantees to give the same result while index retrieving.
       
   309      * So it may be used for search result caching.
       
   310      *
       
   311      * @return integer
       
   312      */
       
   313     public function getGeneration()
       
   314     {
       
   315         return $this->_generation;
       
   316     }
       
   317 
       
   318 
       
   319     /**
       
   320      * Get segments file name
       
   321      *
       
   322      * @param integer $generation
       
   323      * @return string
       
   324      */
       
   325     public static function getSegmentFileName($generation)
       
   326     {
       
   327         if ($generation == 0) {
       
   328             return 'segments';
       
   329         }
       
   330 
       
   331         return 'segments_' . base_convert($generation, 10, 36);
       
   332     }
       
   333 
       
   334     /**
       
   335      * Get index format version
       
   336      *
       
   337      * @return integer
       
   338      */
       
   339     public function getFormatVersion()
       
   340     {
       
   341         return $this->_formatVersion;
       
   342     }
       
   343 
       
   344     /**
       
   345      * Set index format version.
       
   346      * Index is converted to this format at the nearest upfdate time
       
   347      *
       
   348      * @param int $formatVersion
       
   349      * @throws Zend_Search_Lucene_Exception
       
   350      */
       
   351     public function setFormatVersion($formatVersion)
       
   352     {
       
   353         if ($formatVersion != self::FORMAT_PRE_2_1  &&
       
   354             $formatVersion != self::FORMAT_2_1  &&
       
   355             $formatVersion != self::FORMAT_2_3) {
       
   356             require_once 'Zend/Search/Lucene/Exception.php';
       
   357             throw new Zend_Search_Lucene_Exception('Unsupported index format');
       
   358         }
       
   359 
       
   360         $this->_formatVersion = $formatVersion;
       
   361     }
       
   362 
       
   363     /**
       
   364      * Read segments file for pre-2.1 Lucene index format
       
   365      *
       
   366      * @throws Zend_Search_Lucene_Exception
       
   367      */
       
   368     private function _readPre21SegmentsFile()
       
   369     {
       
   370         $segmentsFile = $this->_directory->getFileObject('segments');
       
   371 
       
   372         $format = $segmentsFile->readInt();
       
   373 
       
   374         if ($format != (int)0xFFFFFFFF) {
       
   375             require_once 'Zend/Search/Lucene/Exception.php';
       
   376             throw new Zend_Search_Lucene_Exception('Wrong segments file format');
       
   377         }
       
   378 
       
   379         // read version
       
   380         $segmentsFile->readLong();
       
   381 
       
   382         // read segment name counter
       
   383         $segmentsFile->readInt();
       
   384 
       
   385         $segments = $segmentsFile->readInt();
       
   386 
       
   387         $this->_docCount = 0;
       
   388 
       
   389         // read segmentInfos
       
   390         for ($count = 0; $count < $segments; $count++) {
       
   391             $segName = $segmentsFile->readString();
       
   392             $segSize = $segmentsFile->readInt();
       
   393             $this->_docCount += $segSize;
       
   394 
       
   395             $this->_segmentInfos[$segName] =
       
   396                                 new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
       
   397                                                                          $segName,
       
   398                                                                          $segSize);
       
   399         }
       
   400 
       
   401         // Use 2.1 as a target version. Index will be reorganized at update time.
       
   402         $this->_formatVersion = self::FORMAT_2_1;
       
   403     }
       
   404 
       
   405     /**
       
   406      * Read segments file
       
   407      *
       
   408      * @throws Zend_Search_Lucene_Exception
       
   409      */
       
   410     private function _readSegmentsFile()
       
   411     {
       
   412         $segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation));
       
   413 
       
   414         $format = $segmentsFile->readInt();
       
   415 
       
   416         if ($format == (int)0xFFFFFFFC) {
       
   417             $this->_formatVersion = self::FORMAT_2_3;
       
   418         } else if ($format == (int)0xFFFFFFFD) {
       
   419             $this->_formatVersion = self::FORMAT_2_1;
       
   420         } else {
       
   421             require_once 'Zend/Search/Lucene/Exception.php';
       
   422             throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
       
   423         }
       
   424 
       
   425         // read version
       
   426         $segmentsFile->readLong();
       
   427 
       
   428         // read segment name counter
       
   429         $segmentsFile->readInt();
       
   430 
       
   431         $segments = $segmentsFile->readInt();
       
   432 
       
   433         $this->_docCount = 0;
       
   434 
       
   435         // read segmentInfos
       
   436         for ($count = 0; $count < $segments; $count++) {
       
   437             $segName = $segmentsFile->readString();
       
   438             $segSize = $segmentsFile->readInt();
       
   439 
       
   440             // 2.1+ specific properties
       
   441             $delGen = $segmentsFile->readLong();
       
   442 
       
   443             if ($this->_formatVersion == self::FORMAT_2_3) {
       
   444                 $docStoreOffset = $segmentsFile->readInt();
       
   445 
       
   446                 if ($docStoreOffset != (int)0xFFFFFFFF) {
       
   447                     $docStoreSegment        = $segmentsFile->readString();
       
   448                     $docStoreIsCompoundFile = $segmentsFile->readByte();
       
   449 
       
   450                     $docStoreOptions = array('offset'     => $docStoreOffset,
       
   451                                              'segment'    => $docStoreSegment,
       
   452                                              'isCompound' => ($docStoreIsCompoundFile == 1));
       
   453                 } else {
       
   454                     $docStoreOptions = null;
       
   455                 }
       
   456             } else {
       
   457                 $docStoreOptions = null;
       
   458             }
       
   459 
       
   460             $hasSingleNormFile = $segmentsFile->readByte();
       
   461             $numField          = $segmentsFile->readInt();
       
   462 
       
   463             $normGens = array();
       
   464             if ($numField != (int)0xFFFFFFFF) {
       
   465                 for ($count1 = 0; $count1 < $numField; $count1++) {
       
   466                     $normGens[] = $segmentsFile->readLong();
       
   467                 }
       
   468 
       
   469                 require_once 'Zend/Search/Lucene/Exception.php';
       
   470                 throw new Zend_Search_Lucene_Exception('Separate norm files are not supported. Optimize index to use it with Zend_Search_Lucene.');
       
   471             }
       
   472 
       
   473             $isCompoundByte     = $segmentsFile->readByte();
       
   474 
       
   475             if ($isCompoundByte == 0xFF) {
       
   476                 // The segment is not a compound file
       
   477                 $isCompound = false;
       
   478             } else if ($isCompoundByte == 0x00) {
       
   479                 // The status is unknown
       
   480                 $isCompound = null;
       
   481             } else if ($isCompoundByte == 0x01) {
       
   482                 // The segment is a compound file
       
   483                 $isCompound = true;
       
   484             }
       
   485 
       
   486             $this->_docCount += $segSize;
       
   487 
       
   488             $this->_segmentInfos[$segName] =
       
   489                                 new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
       
   490                                                                          $segName,
       
   491                                                                          $segSize,
       
   492                                                                          $delGen,
       
   493                                                                          $docStoreOptions,
       
   494                                                                          $hasSingleNormFile,
       
   495                                                                          $isCompound);
       
   496         }
       
   497     }
       
   498 
       
   499     /**
       
   500      * Opens the index.
       
   501      *
       
   502      * IndexReader constructor needs Directory as a parameter. It should be
       
   503      * a string with a path to the index folder or a Directory object.
       
   504      *
       
   505      * @param Zend_Search_Lucene_Storage_Directory_Filesystem|string $directory
       
   506      * @throws Zend_Search_Lucene_Exception
       
   507      */
       
   508     public function __construct($directory = null, $create = false)
       
   509     {
       
   510         if ($directory === null) {
       
   511             require_once 'Zend/Search/Lucene/Exception.php';
       
   512             throw new Zend_Search_Exception('No index directory specified');
       
   513         }
       
   514 
       
   515         if (is_string($directory)) {
       
   516             require_once 'Zend/Search/Lucene/Storage/Directory/Filesystem.php';
       
   517             $this->_directory      = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory);
       
   518             $this->_closeDirOnExit = true;
       
   519         } else {
       
   520             $this->_directory      = $directory;
       
   521             $this->_closeDirOnExit = false;
       
   522         }
       
   523 
       
   524         $this->_segmentInfos = array();
       
   525 
       
   526         // Mark index as "under processing" to prevent other processes from premature index cleaning
       
   527         Zend_Search_Lucene_LockManager::obtainReadLock($this->_directory);
       
   528 
       
   529         $this->_generation = self::getActualGeneration($this->_directory);
       
   530 
       
   531         if ($create) {
       
   532             require_once 'Zend/Search/Lucene/Exception.php';
       
   533             try {
       
   534                 Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
       
   535             } catch (Zend_Search_Lucene_Exception $e) {
       
   536                 Zend_Search_Lucene_LockManager::releaseReadLock($this->_directory);
       
   537 
       
   538                 if (strpos($e->getMessage(), 'Can\'t obtain exclusive index lock') === false) {
       
   539                     throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
       
   540                 } else {
       
   541                     throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now', 0, $e);
       
   542                 }
       
   543             }
       
   544 
       
   545             if ($this->_generation == -1) {
       
   546                 // Directory doesn't contain existing index, start from 1
       
   547                 $this->_generation = 1;
       
   548                 $nameCounter = 0;
       
   549             } else {
       
   550                 // Directory contains existing index
       
   551                 $segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation));
       
   552                 $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
       
   553 
       
   554                 $nameCounter = $segmentsFile->readInt();
       
   555                 $this->_generation++;
       
   556             }
       
   557 
       
   558             require_once 'Zend/Search/Lucene/Index/Writer.php';
       
   559             Zend_Search_Lucene_Index_Writer::createIndex($this->_directory, $this->_generation, $nameCounter);
       
   560 
       
   561             Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
       
   562         }
       
   563 
       
   564         if ($this->_generation == -1) {
       
   565             require_once 'Zend/Search/Lucene/Exception.php';
       
   566             throw new Zend_Search_Lucene_Exception('Index doesn\'t exists in the specified directory.');
       
   567         } else if ($this->_generation == 0) {
       
   568             $this->_readPre21SegmentsFile();
       
   569         } else {
       
   570             $this->_readSegmentsFile();
       
   571         }
       
   572     }
       
   573 
       
   574     /**
       
   575      * Close current index and free resources
       
   576      */
       
   577     private function _close()
       
   578     {
       
   579         if ($this->_closed) {
       
   580             // index is already closed and resources are cleaned up
       
   581             return;
       
   582         }
       
   583 
       
   584         $this->commit();
       
   585 
       
   586         // Release "under processing" flag
       
   587         Zend_Search_Lucene_LockManager::releaseReadLock($this->_directory);
       
   588 
       
   589         if ($this->_closeDirOnExit) {
       
   590             $this->_directory->close();
       
   591         }
       
   592 
       
   593         $this->_directory    = null;
       
   594         $this->_writer       = null;
       
   595         $this->_segmentInfos = null;
       
   596 
       
   597         $this->_closed = true;
       
   598     }
       
   599 
       
   600     /**
       
   601      * Add reference to the index object
       
   602      *
       
   603      * @internal
       
   604      */
       
   605     public function addReference()
       
   606     {
       
   607         $this->_refCount++;
       
   608     }
       
   609 
       
   610     /**
       
   611      * Remove reference from the index object
       
   612      *
       
   613      * When reference count becomes zero, index is closed and resources are cleaned up
       
   614      *
       
   615      * @internal
       
   616      */
       
   617     public function removeReference()
       
   618     {
       
   619         $this->_refCount--;
       
   620 
       
   621         if ($this->_refCount == 0) {
       
   622             $this->_close();
       
   623         }
       
   624     }
       
   625 
       
   626     /**
       
   627      * Object destructor
       
   628      */
       
   629     public function __destruct()
       
   630     {
       
   631         $this->_close();
       
   632     }
       
   633 
       
   634     /**
       
   635      * Returns an instance of Zend_Search_Lucene_Index_Writer for the index
       
   636      *
       
   637      * @return Zend_Search_Lucene_Index_Writer
       
   638      */
       
   639     private function _getIndexWriter()
       
   640     {
       
   641         if ($this->_writer === null) {
       
   642             require_once 'Zend/Search/Lucene/Index/Writer.php';
       
   643             $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory,
       
   644                                                                  $this->_segmentInfos,
       
   645                                                                  $this->_formatVersion);
       
   646         }
       
   647 
       
   648         return $this->_writer;
       
   649     }
       
   650 
       
   651 
       
   652     /**
       
   653      * Returns the Zend_Search_Lucene_Storage_Directory instance for this index.
       
   654      *
       
   655      * @return Zend_Search_Lucene_Storage_Directory
       
   656      */
       
   657     public function getDirectory()
       
   658     {
       
   659         return $this->_directory;
       
   660     }
       
   661 
       
   662 
       
   663     /**
       
   664      * Returns the total number of documents in this index (including deleted documents).
       
   665      *
       
   666      * @return integer
       
   667      */
       
   668     public function count()
       
   669     {
       
   670         return $this->_docCount;
       
   671     }
       
   672 
       
   673     /**
       
   674      * Returns one greater than the largest possible document number.
       
   675      * This may be used to, e.g., determine how big to allocate a structure which will have
       
   676      * an element for every document number in an index.
       
   677      *
       
   678      * @return integer
       
   679      */
       
   680     public function maxDoc()
       
   681     {
       
   682         return $this->count();
       
   683     }
       
   684 
       
   685     /**
       
   686      * Returns the total number of non-deleted documents in this index.
       
   687      *
       
   688      * @return integer
       
   689      */
       
   690     public function numDocs()
       
   691     {
       
   692         $numDocs = 0;
       
   693 
       
   694         foreach ($this->_segmentInfos as $segmentInfo) {
       
   695             $numDocs += $segmentInfo->numDocs();
       
   696         }
       
   697 
       
   698         return $numDocs;
       
   699     }
       
   700 
       
   701     /**
       
   702      * Checks, that document is deleted
       
   703      *
       
   704      * @param integer $id
       
   705      * @return boolean
       
   706      * @throws Zend_Search_Lucene_Exception    Exception is thrown if $id is out of the range
       
   707      */
       
   708     public function isDeleted($id)
       
   709     {
       
   710         $this->commit();
       
   711 
       
   712         if ($id >= $this->_docCount) {
       
   713             require_once 'Zend/Search/Lucene/Exception.php';
       
   714             throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
       
   715         }
       
   716 
       
   717         $segmentStartId = 0;
       
   718         foreach ($this->_segmentInfos as $segmentInfo) {
       
   719             if ($segmentStartId + $segmentInfo->count() > $id) {
       
   720                 break;
       
   721             }
       
   722 
       
   723             $segmentStartId += $segmentInfo->count();
       
   724         }
       
   725 
       
   726         return $segmentInfo->isDeleted($id - $segmentStartId);
       
   727     }
       
   728 
       
   729     /**
       
   730      * Set default search field.
       
   731      *
       
   732      * Null means, that search is performed through all fields by default
       
   733      *
       
   734      * Default value is null
       
   735      *
       
   736      * @param string $fieldName
       
   737      */
       
   738     public static function setDefaultSearchField($fieldName)
       
   739     {
       
   740         self::$_defaultSearchField = $fieldName;
       
   741     }
       
   742 
       
   743     /**
       
   744      * Get default search field.
       
   745      *
       
   746      * Null means, that search is performed through all fields by default
       
   747      *
       
   748      * @return string
       
   749      */
       
   750     public static function getDefaultSearchField()
       
   751     {
       
   752         return self::$_defaultSearchField;
       
   753     }
       
   754 
       
   755     /**
       
   756      * Set result set limit.
       
   757      *
       
   758      * 0 (default) means no limit
       
   759      *
       
   760      * @param integer $limit
       
   761      */
       
   762     public static function setResultSetLimit($limit)
       
   763     {
       
   764         self::$_resultSetLimit = $limit;
       
   765     }
       
   766 
       
   767     /**
       
   768      * Get result set limit.
       
   769      *
       
   770      * 0 means no limit
       
   771      *
       
   772      * @return integer
       
   773      */
       
   774     public static function getResultSetLimit()
       
   775     {
       
   776         return self::$_resultSetLimit;
       
   777     }
       
   778 
       
   779     /**
       
   780      * Set terms per query limit.
       
   781      *
       
   782      * 0 means no limit
       
   783      *
       
   784      * @param integer $limit
       
   785      */
       
   786     public static function setTermsPerQueryLimit($limit)
       
   787     {
       
   788         self::$_termsPerQueryLimit = $limit;
       
   789     }
       
   790 
       
   791     /**
       
   792      * Get result set limit.
       
   793      *
       
   794      * 0 (default) means no limit
       
   795      *
       
   796      * @return integer
       
   797      */
       
   798     public static function getTermsPerQueryLimit()
       
   799     {
       
   800         return self::$_termsPerQueryLimit;
       
   801     }
       
   802 
       
   803     /**
       
   804      * Retrieve index maxBufferedDocs option
       
   805      *
       
   806      * maxBufferedDocs is a minimal number of documents required before
       
   807      * the buffered in-memory documents are written into a new Segment
       
   808      *
       
   809      * Default value is 10
       
   810      *
       
   811      * @return integer
       
   812      */
       
   813     public function getMaxBufferedDocs()
       
   814     {
       
   815         return $this->_getIndexWriter()->maxBufferedDocs;
       
   816     }
       
   817 
       
   818     /**
       
   819      * Set index maxBufferedDocs option
       
   820      *
       
   821      * maxBufferedDocs is a minimal number of documents required before
       
   822      * the buffered in-memory documents are written into a new Segment
       
   823      *
       
   824      * Default value is 10
       
   825      *
       
   826      * @param integer $maxBufferedDocs
       
   827      */
       
   828     public function setMaxBufferedDocs($maxBufferedDocs)
       
   829     {
       
   830         $this->_getIndexWriter()->maxBufferedDocs = $maxBufferedDocs;
       
   831     }
       
   832 
       
   833     /**
       
   834      * Retrieve index maxMergeDocs option
       
   835      *
       
   836      * maxMergeDocs is a largest number of documents ever merged by addDocument().
       
   837      * Small values (e.g., less than 10,000) are best for interactive indexing,
       
   838      * as this limits the length of pauses while indexing to a few seconds.
       
   839      * Larger values are best for batched indexing and speedier searches.
       
   840      *
       
   841      * Default value is PHP_INT_MAX
       
   842      *
       
   843      * @return integer
       
   844      */
       
   845     public function getMaxMergeDocs()
       
   846     {
       
   847         return $this->_getIndexWriter()->maxMergeDocs;
       
   848     }
       
   849 
       
   850     /**
       
   851      * Set index maxMergeDocs option
       
   852      *
       
   853      * maxMergeDocs is a largest number of documents ever merged by addDocument().
       
   854      * Small values (e.g., less than 10,000) are best for interactive indexing,
       
   855      * as this limits the length of pauses while indexing to a few seconds.
       
   856      * Larger values are best for batched indexing and speedier searches.
       
   857      *
       
   858      * Default value is PHP_INT_MAX
       
   859      *
       
   860      * @param integer $maxMergeDocs
       
   861      */
       
   862     public function setMaxMergeDocs($maxMergeDocs)
       
   863     {
       
   864         $this->_getIndexWriter()->maxMergeDocs = $maxMergeDocs;
       
   865     }
       
   866 
       
   867     /**
       
   868      * Retrieve index mergeFactor option
       
   869      *
       
   870      * mergeFactor determines how often segment indices are merged by addDocument().
       
   871      * With smaller values, less RAM is used while indexing,
       
   872      * and searches on unoptimized indices are faster,
       
   873      * but indexing speed is slower.
       
   874      * With larger values, more RAM is used during indexing,
       
   875      * and while searches on unoptimized indices are slower,
       
   876      * indexing is faster.
       
   877      * Thus larger values (> 10) are best for batch index creation,
       
   878      * and smaller values (< 10) for indices that are interactively maintained.
       
   879      *
       
   880      * Default value is 10
       
   881      *
       
   882      * @return integer
       
   883      */
       
   884     public function getMergeFactor()
       
   885     {
       
   886         return $this->_getIndexWriter()->mergeFactor;
       
   887     }
       
   888 
       
   889     /**
       
   890      * Set index mergeFactor option
       
   891      *
       
   892      * mergeFactor determines how often segment indices are merged by addDocument().
       
   893      * With smaller values, less RAM is used while indexing,
       
   894      * and searches on unoptimized indices are faster,
       
   895      * but indexing speed is slower.
       
   896      * With larger values, more RAM is used during indexing,
       
   897      * and while searches on unoptimized indices are slower,
       
   898      * indexing is faster.
       
   899      * Thus larger values (> 10) are best for batch index creation,
       
   900      * and smaller values (< 10) for indices that are interactively maintained.
       
   901      *
       
   902      * Default value is 10
       
   903      *
       
   904      * @param integer $maxMergeDocs
       
   905      */
       
   906     public function setMergeFactor($mergeFactor)
       
   907     {
       
   908         $this->_getIndexWriter()->mergeFactor = $mergeFactor;
       
   909     }
       
   910 
       
   911     /**
       
   912      * Performs a query against the index and returns an array
       
   913      * of Zend_Search_Lucene_Search_QueryHit objects.
       
   914      * Input is a string or Zend_Search_Lucene_Search_Query.
       
   915      *
       
   916      * @param Zend_Search_Lucene_Search_QueryParser|string $query
       
   917      * @return array Zend_Search_Lucene_Search_QueryHit
       
   918      * @throws Zend_Search_Lucene_Exception
       
   919      */
       
   920     public function find($query)
       
   921     {
       
   922         if (is_string($query)) {
       
   923             require_once 'Zend/Search/Lucene/Search/QueryParser.php';
       
   924 
       
   925             $query = Zend_Search_Lucene_Search_QueryParser::parse($query);
       
   926         }
       
   927 
       
   928         if (!$query instanceof Zend_Search_Lucene_Search_Query) {
       
   929             require_once 'Zend/Search/Lucene/Exception.php';
       
   930             throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object');
       
   931         }
       
   932 
       
   933         $this->commit();
       
   934 
       
   935         $hits   = array();
       
   936         $scores = array();
       
   937         $ids    = array();
       
   938 
       
   939         $query = $query->rewrite($this)->optimize($this);
       
   940 
       
   941         $query->execute($this);
       
   942 
       
   943         $topScore = 0;
       
   944 
       
   945         /** Zend_Search_Lucene_Search_QueryHit */
       
   946         require_once 'Zend/Search/Lucene/Search/QueryHit.php';
       
   947 
       
   948         foreach ($query->matchedDocs() as $id => $num) {
       
   949             $docScore = $query->score($id, $this);
       
   950             if( $docScore != 0 ) {
       
   951                 $hit = new Zend_Search_Lucene_Search_QueryHit($this);
       
   952                 $hit->id = $id;
       
   953                 $hit->score = $docScore;
       
   954 
       
   955                 $hits[]   = $hit;
       
   956                 $ids[]    = $id;
       
   957                 $scores[] = $docScore;
       
   958 
       
   959                 if ($docScore > $topScore) {
       
   960                     $topScore = $docScore;
       
   961                 }
       
   962             }
       
   963 
       
   964             if (self::$_resultSetLimit != 0  &&  count($hits) >= self::$_resultSetLimit) {
       
   965                 break;
       
   966             }
       
   967         }
       
   968 
       
   969         if (count($hits) == 0) {
       
   970             // skip sorting, which may cause a error on empty index
       
   971             return array();
       
   972         }
       
   973 
       
   974         if ($topScore > 1) {
       
   975             foreach ($hits as $hit) {
       
   976                 $hit->score /= $topScore;
       
   977             }
       
   978         }
       
   979 
       
   980         if (func_num_args() == 1) {
       
   981             // sort by scores
       
   982             array_multisort($scores, SORT_DESC, SORT_NUMERIC,
       
   983                             $ids,    SORT_ASC,  SORT_NUMERIC,
       
   984                             $hits);
       
   985         } else {
       
   986             // sort by given field names
       
   987 
       
   988             $argList    = func_get_args();
       
   989             $fieldNames = $this->getFieldNames();
       
   990             $sortArgs   = array();
       
   991 
       
   992             // PHP 5.3 now expects all arguments to array_multisort be passed by
       
   993             // reference (if it's invoked through call_user_func_array());
       
   994             // since constants can't be passed by reference, create some placeholder variables.
       
   995             $sortReg    = SORT_REGULAR;
       
   996             $sortAsc    = SORT_ASC;
       
   997             $sortNum    = SORT_NUMERIC;
       
   998 
       
   999             $sortFieldValues = array();
       
  1000 
       
  1001             require_once 'Zend/Search/Lucene/Exception.php';
       
  1002             for ($count = 1; $count < count($argList); $count++) {
       
  1003                 $fieldName = $argList[$count];
       
  1004 
       
  1005                 if (!is_string($fieldName)) {
       
  1006                     throw new Zend_Search_Lucene_Exception('Field name must be a string.');
       
  1007                 }
       
  1008 
       
  1009                 if (strtolower($fieldName) == 'score') {
       
  1010                     $sortArgs[] = &$scores;
       
  1011                 } else {
       
  1012                     if (!in_array($fieldName, $fieldNames)) {
       
  1013                         throw new Zend_Search_Lucene_Exception('Wrong field name.');
       
  1014                     }
       
  1015 
       
  1016                     if (!isset($sortFieldValues[$fieldName])) {
       
  1017                         $valuesArray = array();
       
  1018                         foreach ($hits as $hit) {
       
  1019                             try {
       
  1020                                 $value = $hit->getDocument()->getFieldValue($fieldName);
       
  1021                             } catch (Zend_Search_Lucene_Exception $e) {
       
  1022                                 if (strpos($e->getMessage(), 'not found') === false) {
       
  1023                                     throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
       
  1024                                 } else {
       
  1025                                     $value = null;
       
  1026                                 }
       
  1027                             }
       
  1028 
       
  1029                             $valuesArray[] = $value;
       
  1030                         }
       
  1031 
       
  1032                         // Collect loaded values in $sortFieldValues
       
  1033                         // Required for PHP 5.3 which translates references into values when source
       
  1034                         // variable is destroyed
       
  1035                         $sortFieldValues[$fieldName] = $valuesArray;
       
  1036                     }
       
  1037 
       
  1038                     $sortArgs[] = &$sortFieldValues[$fieldName];
       
  1039                 }
       
  1040 
       
  1041                 if ($count + 1 < count($argList)  &&  is_integer($argList[$count+1])) {
       
  1042                     $count++;
       
  1043                     $sortArgs[] = &$argList[$count];
       
  1044 
       
  1045                     if ($count + 1 < count($argList)  &&  is_integer($argList[$count+1])) {
       
  1046                         $count++;
       
  1047                         $sortArgs[] = &$argList[$count];
       
  1048                     } else {
       
  1049                         if ($argList[$count] == SORT_ASC  || $argList[$count] == SORT_DESC) {
       
  1050                             $sortArgs[] = &$sortReg;
       
  1051                         } else {
       
  1052                             $sortArgs[] = &$sortAsc;
       
  1053                         }
       
  1054                     }
       
  1055                 } else {
       
  1056                     $sortArgs[] = &$sortAsc;
       
  1057                     $sortArgs[] = &$sortReg;
       
  1058                 }
       
  1059             }
       
  1060 
       
  1061             // Sort by id's if values are equal
       
  1062             $sortArgs[] = &$ids;
       
  1063             $sortArgs[] = &$sortAsc;
       
  1064             $sortArgs[] = &$sortNum;
       
  1065 
       
  1066             // Array to be sorted
       
  1067             $sortArgs[] = &$hits;
       
  1068 
       
  1069             // Do sort
       
  1070             call_user_func_array('array_multisort', $sortArgs);
       
  1071         }
       
  1072 
       
  1073         return $hits;
       
  1074     }
       
  1075 
       
  1076 
       
  1077     /**
       
  1078      * Returns a list of all unique field names that exist in this index.
       
  1079      *
       
  1080      * @param boolean $indexed
       
  1081      * @return array
       
  1082      */
       
  1083     public function getFieldNames($indexed = false)
       
  1084     {
       
  1085         $result = array();
       
  1086         foreach( $this->_segmentInfos as $segmentInfo ) {
       
  1087             $result = array_merge($result, $segmentInfo->getFields($indexed));
       
  1088         }
       
  1089         return $result;
       
  1090     }
       
  1091 
       
  1092 
       
  1093     /**
       
  1094      * Returns a Zend_Search_Lucene_Document object for the document
       
  1095      * number $id in this index.
       
  1096      *
       
  1097      * @param integer|Zend_Search_Lucene_Search_QueryHit $id
       
  1098      * @return Zend_Search_Lucene_Document
       
  1099      * @throws Zend_Search_Lucene_Exception    Exception is thrown if $id is out of the range
       
  1100      */
       
  1101     public function getDocument($id)
       
  1102     {
       
  1103         if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
       
  1104             /* @var $id Zend_Search_Lucene_Search_QueryHit */
       
  1105             $id = $id->id;
       
  1106         }
       
  1107 
       
  1108         if ($id >= $this->_docCount) {
       
  1109             require_once 'Zend/Search/Lucene/Exception.php';
       
  1110             throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
       
  1111         }
       
  1112 
       
  1113         $segmentStartId = 0;
       
  1114         foreach ($this->_segmentInfos as $segmentInfo) {
       
  1115             if ($segmentStartId + $segmentInfo->count() > $id) {
       
  1116                 break;
       
  1117             }
       
  1118 
       
  1119             $segmentStartId += $segmentInfo->count();
       
  1120         }
       
  1121 
       
  1122         $fdxFile = $segmentInfo->openCompoundFile('.fdx');
       
  1123         $fdxFile->seek(($id-$segmentStartId)*8, SEEK_CUR);
       
  1124         $fieldValuesPosition = $fdxFile->readLong();
       
  1125 
       
  1126         $fdtFile = $segmentInfo->openCompoundFile('.fdt');
       
  1127         $fdtFile->seek($fieldValuesPosition, SEEK_CUR);
       
  1128         $fieldCount = $fdtFile->readVInt();
       
  1129 
       
  1130         $doc = new Zend_Search_Lucene_Document();
       
  1131         for ($count = 0; $count < $fieldCount; $count++) {
       
  1132             $fieldNum = $fdtFile->readVInt();
       
  1133             $bits = $fdtFile->readByte();
       
  1134 
       
  1135             $fieldInfo = $segmentInfo->getField($fieldNum);
       
  1136 
       
  1137             if (!($bits & 2)) { // Text data
       
  1138                 $field = new Zend_Search_Lucene_Field($fieldInfo->name,
       
  1139                                                       $fdtFile->readString(),
       
  1140                                                       'UTF-8',
       
  1141                                                       true,
       
  1142                                                       $fieldInfo->isIndexed,
       
  1143                                                       $bits & 1 );
       
  1144             } else {            // Binary data
       
  1145                 $field = new Zend_Search_Lucene_Field($fieldInfo->name,
       
  1146                                                       $fdtFile->readBinary(),
       
  1147                                                       '',
       
  1148                                                       true,
       
  1149                                                       $fieldInfo->isIndexed,
       
  1150                                                       $bits & 1,
       
  1151                                                       true );
       
  1152             }
       
  1153 
       
  1154             $doc->addField($field);
       
  1155         }
       
  1156 
       
  1157         return $doc;
       
  1158     }
       
  1159 
       
  1160 
       
  1161     /**
       
  1162      * Returns true if index contain documents with specified term.
       
  1163      *
       
  1164      * Is used for query optimization.
       
  1165      *
       
  1166      * @param Zend_Search_Lucene_Index_Term $term
       
  1167      * @return boolean
       
  1168      */
       
  1169     public function hasTerm(Zend_Search_Lucene_Index_Term $term)
       
  1170     {
       
  1171         foreach ($this->_segmentInfos as $segInfo) {
       
  1172             if ($segInfo->getTermInfo($term) !== null) {
       
  1173                 return true;
       
  1174             }
       
  1175         }
       
  1176 
       
  1177         return false;
       
  1178     }
       
  1179 
       
  1180     /**
       
  1181      * Returns IDs of all documents containing term.
       
  1182      *
       
  1183      * @param Zend_Search_Lucene_Index_Term $term
       
  1184      * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
       
  1185      * @return array
       
  1186      */
       
  1187     public function termDocs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
       
  1188     {
       
  1189         $subResults = array();
       
  1190         $segmentStartDocId = 0;
       
  1191 
       
  1192         foreach ($this->_segmentInfos as $segmentInfo) {
       
  1193             $subResults[] = $segmentInfo->termDocs($term, $segmentStartDocId, $docsFilter);
       
  1194 
       
  1195             $segmentStartDocId += $segmentInfo->count();
       
  1196         }
       
  1197 
       
  1198         if (count($subResults) == 0) {
       
  1199             return array();
       
  1200         } else if (count($subResults) == 1) {
       
  1201             // Index is optimized (only one segment)
       
  1202             // Do not perform array reindexing
       
  1203             return reset($subResults);
       
  1204         } else {
       
  1205             $result = call_user_func_array('array_merge', $subResults);
       
  1206         }
       
  1207 
       
  1208         return $result;
       
  1209     }
       
  1210 
       
  1211     /**
       
  1212      * Returns documents filter for all documents containing term.
       
  1213      *
       
  1214      * It performs the same operation as termDocs, but return result as
       
  1215      * Zend_Search_Lucene_Index_DocsFilter object
       
  1216      *
       
  1217      * @param Zend_Search_Lucene_Index_Term $term
       
  1218      * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
       
  1219      * @return Zend_Search_Lucene_Index_DocsFilter
       
  1220      */
       
  1221     public function termDocsFilter(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
       
  1222     {
       
  1223         $segmentStartDocId = 0;
       
  1224         $result = new Zend_Search_Lucene_Index_DocsFilter();
       
  1225 
       
  1226         foreach ($this->_segmentInfos as $segmentInfo) {
       
  1227             $subResults[] = $segmentInfo->termDocs($term, $segmentStartDocId, $docsFilter);
       
  1228 
       
  1229             $segmentStartDocId += $segmentInfo->count();
       
  1230         }
       
  1231 
       
  1232         if (count($subResults) == 0) {
       
  1233             return array();
       
  1234         } else if (count($subResults) == 1) {
       
  1235             // Index is optimized (only one segment)
       
  1236             // Do not perform array reindexing
       
  1237             return reset($subResults);
       
  1238         } else {
       
  1239             $result = call_user_func_array('array_merge', $subResults);
       
  1240         }
       
  1241 
       
  1242         return $result;
       
  1243     }
       
  1244 
       
  1245 
       
  1246     /**
       
  1247      * Returns an array of all term freqs.
       
  1248      * Result array structure: array(docId => freq, ...)
       
  1249      *
       
  1250      * @param Zend_Search_Lucene_Index_Term $term
       
  1251      * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
       
  1252      * @return integer
       
  1253      */
       
  1254     public function termFreqs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
       
  1255     {
       
  1256         $result = array();
       
  1257         $segmentStartDocId = 0;
       
  1258         foreach ($this->_segmentInfos as $segmentInfo) {
       
  1259             $result += $segmentInfo->termFreqs($term, $segmentStartDocId, $docsFilter);
       
  1260 
       
  1261             $segmentStartDocId += $segmentInfo->count();
       
  1262         }
       
  1263 
       
  1264         return $result;
       
  1265     }
       
  1266 
       
  1267     /**
       
  1268      * Returns an array of all term positions in the documents.
       
  1269      * Result array structure: array(docId => array(pos1, pos2, ...), ...)
       
  1270      *
       
  1271      * @param Zend_Search_Lucene_Index_Term $term
       
  1272      * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
       
  1273      * @return array
       
  1274      */
       
  1275     public function termPositions(Zend_Search_Lucene_Index_Term $term, $docsFilter = null)
       
  1276     {
       
  1277         $result = array();
       
  1278         $segmentStartDocId = 0;
       
  1279         foreach ($this->_segmentInfos as $segmentInfo) {
       
  1280             $result += $segmentInfo->termPositions($term, $segmentStartDocId, $docsFilter);
       
  1281 
       
  1282             $segmentStartDocId += $segmentInfo->count();
       
  1283         }
       
  1284 
       
  1285         return $result;
       
  1286     }
       
  1287 
       
  1288 
       
  1289     /**
       
  1290      * Returns the number of documents in this index containing the $term.
       
  1291      *
       
  1292      * @param Zend_Search_Lucene_Index_Term $term
       
  1293      * @return integer
       
  1294      */
       
  1295     public function docFreq(Zend_Search_Lucene_Index_Term $term)
       
  1296     {
       
  1297         $result = 0;
       
  1298         foreach ($this->_segmentInfos as $segInfo) {
       
  1299             $termInfo = $segInfo->getTermInfo($term);
       
  1300             if ($termInfo !== null) {
       
  1301                 $result += $termInfo->docFreq;
       
  1302             }
       
  1303         }
       
  1304 
       
  1305         return $result;
       
  1306     }
       
  1307 
       
  1308 
       
  1309     /**
       
  1310      * Retrive similarity used by index reader
       
  1311      *
       
  1312      * @return Zend_Search_Lucene_Search_Similarity
       
  1313      */
       
  1314     public function getSimilarity()
       
  1315     {
       
  1316         /** Zend_Search_Lucene_Search_Similarity */
       
  1317         require_once 'Zend/Search/Lucene/Search/Similarity.php';
       
  1318 
       
  1319         return Zend_Search_Lucene_Search_Similarity::getDefault();
       
  1320     }
       
  1321 
       
  1322 
       
  1323     /**
       
  1324      * Returns a normalization factor for "field, document" pair.
       
  1325      *
       
  1326      * @param integer $id
       
  1327      * @param string $fieldName
       
  1328      * @return float
       
  1329      */
       
  1330     public function norm($id, $fieldName)
       
  1331     {
       
  1332         if ($id >= $this->_docCount) {
       
  1333             return null;
       
  1334         }
       
  1335 
       
  1336         $segmentStartId = 0;
       
  1337         foreach ($this->_segmentInfos as $segInfo) {
       
  1338             if ($segmentStartId + $segInfo->count() > $id) {
       
  1339                 break;
       
  1340             }
       
  1341 
       
  1342             $segmentStartId += $segInfo->count();
       
  1343         }
       
  1344 
       
  1345         if ($segInfo->isDeleted($id - $segmentStartId)) {
       
  1346             return 0;
       
  1347         }
       
  1348 
       
  1349         return $segInfo->norm($id - $segmentStartId, $fieldName);
       
  1350     }
       
  1351 
       
  1352     /**
       
  1353      * Returns true if any documents have been deleted from this index.
       
  1354      *
       
  1355      * @return boolean
       
  1356      */
       
  1357     public function hasDeletions()
       
  1358     {
       
  1359         foreach ($this->_segmentInfos as $segmentInfo) {
       
  1360             if ($segmentInfo->hasDeletions()) {
       
  1361                 return true;
       
  1362             }
       
  1363         }
       
  1364 
       
  1365         return false;
       
  1366     }
       
  1367 
       
  1368 
       
  1369     /**
       
  1370      * Deletes a document from the index.
       
  1371      * $id is an internal document id
       
  1372      *
       
  1373      * @param integer|Zend_Search_Lucene_Search_QueryHit $id
       
  1374      * @throws Zend_Search_Lucene_Exception
       
  1375      */
       
  1376     public function delete($id)
       
  1377     {
       
  1378         if ($id instanceof Zend_Search_Lucene_Search_QueryHit) {
       
  1379             /* @var $id Zend_Search_Lucene_Search_QueryHit */
       
  1380             $id = $id->id;
       
  1381         }
       
  1382 
       
  1383         if ($id >= $this->_docCount) {
       
  1384             require_once 'Zend/Search/Lucene/Exception.php';
       
  1385             throw new Zend_Search_Lucene_Exception('Document id is out of the range.');
       
  1386         }
       
  1387 
       
  1388         $segmentStartId = 0;
       
  1389         foreach ($this->_segmentInfos as $segmentInfo) {
       
  1390             if ($segmentStartId + $segmentInfo->count() > $id) {
       
  1391                 break;
       
  1392             }
       
  1393 
       
  1394             $segmentStartId += $segmentInfo->count();
       
  1395         }
       
  1396         $segmentInfo->delete($id - $segmentStartId);
       
  1397 
       
  1398         $this->_hasChanges = true;
       
  1399     }
       
  1400 
       
  1401 
       
  1402 
       
  1403     /**
       
  1404      * Adds a document to this index.
       
  1405      *
       
  1406      * @param Zend_Search_Lucene_Document $document
       
  1407      */
       
  1408     public function addDocument(Zend_Search_Lucene_Document $document)
       
  1409     {
       
  1410         $this->_getIndexWriter()->addDocument($document);
       
  1411         $this->_docCount++;
       
  1412 
       
  1413         $this->_hasChanges = true;
       
  1414     }
       
  1415 
       
  1416 
       
  1417     /**
       
  1418      * Update document counter
       
  1419      */
       
  1420     private function _updateDocCount()
       
  1421     {
       
  1422         $this->_docCount = 0;
       
  1423         foreach ($this->_segmentInfos as $segInfo) {
       
  1424             $this->_docCount += $segInfo->count();
       
  1425         }
       
  1426     }
       
  1427 
       
  1428     /**
       
  1429      * Commit changes resulting from delete() or undeleteAll() operations.
       
  1430      *
       
  1431      * @todo undeleteAll processing.
       
  1432      */
       
  1433     public function commit()
       
  1434     {
       
  1435         if ($this->_hasChanges) {
       
  1436             $this->_getIndexWriter()->commit();
       
  1437 
       
  1438             $this->_updateDocCount();
       
  1439 
       
  1440             $this->_hasChanges = false;
       
  1441         }
       
  1442     }
       
  1443 
       
  1444 
       
  1445     /**
       
  1446      * Optimize index.
       
  1447      *
       
  1448      * Merges all segments into one
       
  1449      */
       
  1450     public function optimize()
       
  1451     {
       
  1452         // Commit changes if any changes have been made
       
  1453         $this->commit();
       
  1454 
       
  1455         if (count($this->_segmentInfos) > 1 || $this->hasDeletions()) {
       
  1456             $this->_getIndexWriter()->optimize();
       
  1457             $this->_updateDocCount();
       
  1458         }
       
  1459     }
       
  1460 
       
  1461 
       
  1462     /**
       
  1463      * Returns an array of all terms in this index.
       
  1464      *
       
  1465      * @return array
       
  1466      */
       
  1467     public function terms()
       
  1468     {
       
  1469         $result = array();
       
  1470 
       
  1471         /** Zend_Search_Lucene_Index_TermsPriorityQueue */
       
  1472         require_once 'Zend/Search/Lucene/Index/TermsPriorityQueue.php';
       
  1473 
       
  1474         $segmentInfoQueue = new Zend_Search_Lucene_Index_TermsPriorityQueue();
       
  1475 
       
  1476         foreach ($this->_segmentInfos as $segmentInfo) {
       
  1477             $segmentInfo->resetTermsStream();
       
  1478 
       
  1479             // Skip "empty" segments
       
  1480             if ($segmentInfo->currentTerm() !== null) {
       
  1481                 $segmentInfoQueue->put($segmentInfo);
       
  1482             }
       
  1483         }
       
  1484 
       
  1485         while (($segmentInfo = $segmentInfoQueue->pop()) !== null) {
       
  1486             if ($segmentInfoQueue->top() === null ||
       
  1487                 $segmentInfoQueue->top()->currentTerm()->key() !=
       
  1488                             $segmentInfo->currentTerm()->key()) {
       
  1489                 // We got new term
       
  1490                 $result[] = $segmentInfo->currentTerm();
       
  1491             }
       
  1492 
       
  1493             if ($segmentInfo->nextTerm() !== null) {
       
  1494                 // Put segment back into the priority queue
       
  1495                 $segmentInfoQueue->put($segmentInfo);
       
  1496             }
       
  1497         }
       
  1498 
       
  1499         return $result;
       
  1500     }
       
  1501 
       
  1502 
       
  1503     /**
       
  1504      * Terms stream priority queue object
       
  1505      *
       
  1506      * @var Zend_Search_Lucene_TermStreamsPriorityQueue
       
  1507      */
       
  1508     private $_termsStream = null;
       
  1509 
       
  1510     /**
       
  1511      * Reset terms stream.
       
  1512      */
       
  1513     public function resetTermsStream()
       
  1514     {
       
  1515         if ($this->_termsStream === null) {
       
  1516             /** Zend_Search_Lucene_TermStreamsPriorityQueue */
       
  1517             require_once 'Zend/Search/Lucene/TermStreamsPriorityQueue.php';
       
  1518 
       
  1519             $this->_termsStream = new Zend_Search_Lucene_TermStreamsPriorityQueue($this->_segmentInfos);
       
  1520         } else {
       
  1521             $this->_termsStream->resetTermsStream();
       
  1522         }
       
  1523     }
       
  1524 
       
  1525     /**
       
  1526      * Skip terms stream up to the specified term preffix.
       
  1527      *
       
  1528      * Prefix contains fully specified field info and portion of searched term
       
  1529      *
       
  1530      * @param Zend_Search_Lucene_Index_Term $prefix
       
  1531      */
       
  1532     public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
       
  1533     {
       
  1534         $this->_termsStream->skipTo($prefix);
       
  1535     }
       
  1536 
       
  1537     /**
       
  1538      * Scans terms dictionary and returns next term
       
  1539      *
       
  1540      * @return Zend_Search_Lucene_Index_Term|null
       
  1541      */
       
  1542     public function nextTerm()
       
  1543     {
       
  1544         return $this->_termsStream->nextTerm();
       
  1545     }
       
  1546 
       
  1547     /**
       
  1548      * Returns term in current position
       
  1549      *
       
  1550      * @return Zend_Search_Lucene_Index_Term|null
       
  1551      */
       
  1552     public function currentTerm()
       
  1553     {
       
  1554         return $this->_termsStream->currentTerm();
       
  1555     }
       
  1556 
       
  1557     /**
       
  1558      * Close terms stream
       
  1559      *
       
  1560      * Should be used for resources clean up if stream is not read up to the end
       
  1561      */
       
  1562     public function closeTermsStream()
       
  1563     {
       
  1564         $this->_termsStream->closeTermsStream();
       
  1565         $this->_termsStream = null;
       
  1566     }
       
  1567 
       
  1568 
       
  1569     /*************************************************************************
       
  1570     @todo UNIMPLEMENTED
       
  1571     *************************************************************************/
       
  1572     /**
       
  1573      * Undeletes all documents currently marked as deleted in this index.
       
  1574      *
       
  1575      * @todo Implementation
       
  1576      */
       
  1577     public function undeleteAll()
       
  1578     {}
       
  1579 }