web/lib/Zend/Search/Lucene/Index/Writer.php
changeset 64 162c1de6545a
parent 19 1c2f13fd785c
child 68 ecaf28ffe26e
equal deleted inserted replaced
63:5b37998e522e 64:162c1de6545a
       
     1 <?php
       
     2 /**
       
     3  * Zend Framework
       
     4  *
       
     5  * LICENSE
       
     6  *
       
     7  * This source file is subject to the new BSD license that is bundled
       
     8  * with this package in the file LICENSE.txt.
       
     9  * It is also available through the world-wide-web at this URL:
       
    10  * http://framework.zend.com/license/new-bsd
       
    11  * If you did not receive a copy of the license and are unable to
       
    12  * obtain it through the world-wide-web, please send an email
       
    13  * to license@zend.com so we can send you a copy immediately.
       
    14  *
       
    15  * @category   Zend
       
    16  * @package    Zend_Search_Lucene
       
    17  * @subpackage Index
       
    18  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    19  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    20  * @version    $Id: Writer.php 20096 2010-01-06 02:05:09Z bkarwin $
       
    21  */
       
    22 
       
    23 
       
    24 /** Zend_Search_Lucene_LockManager */
       
    25 require_once 'Zend/Search/Lucene/LockManager.php';
       
    26 
       
    27 
       
    28 /**
       
    29  * @category   Zend
       
    30  * @package    Zend_Search_Lucene
       
    31  * @subpackage Index
       
    32  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    33  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    34  */
       
    35 class Zend_Search_Lucene_Index_Writer
       
    36 {
       
    37     /**
       
    38      * @todo Implement Analyzer substitution
       
    39      * @todo Implement Zend_Search_Lucene_Storage_DirectoryRAM and Zend_Search_Lucene_Storage_FileRAM to use it for
       
    40      *       temporary index files
       
    41      * @todo Directory lock processing
       
    42      */
       
    43 
       
    44     /**
       
    45      * Number of documents required before the buffered in-memory
       
    46      * documents are written into a new Segment
       
    47      *
       
    48      * Default value is 10
       
    49      *
       
    50      * @var integer
       
    51      */
       
    52     public $maxBufferedDocs = 10;
       
    53 
       
    54     /**
       
    55      * Largest number of documents ever merged by addDocument().
       
    56      * Small values (e.g., less than 10,000) are best for interactive indexing,
       
    57      * as this limits the length of pauses while indexing to a few seconds.
       
    58      * Larger values are best for batched indexing and speedier searches.
       
    59      *
       
    60      * Default value is PHP_INT_MAX
       
    61      *
       
    62      * @var integer
       
    63      */
       
    64     public $maxMergeDocs = PHP_INT_MAX;
       
    65 
       
    66     /**
       
    67      * Determines how often segment indices are merged by addDocument().
       
    68      *
       
    69      * With smaller values, less RAM is used while indexing,
       
    70      * and searches on unoptimized indices are faster,
       
    71      * but indexing speed is slower.
       
    72      *
       
    73      * With larger values, more RAM is used during indexing,
       
    74      * and while searches on unoptimized indices are slower,
       
    75      * indexing is faster.
       
    76      *
       
    77      * Thus larger values (> 10) are best for batch index creation,
       
    78      * and smaller values (< 10) for indices that are interactively maintained.
       
    79      *
       
    80      * Default value is 10
       
    81      *
       
    82      * @var integer
       
    83      */
       
    84     public $mergeFactor = 10;
       
    85 
       
    86     /**
       
    87      * File system adapter.
       
    88      *
       
    89      * @var Zend_Search_Lucene_Storage_Directory
       
    90      */
       
    91     private $_directory = null;
       
    92 
       
    93 
       
    94     /**
       
    95      * Changes counter.
       
    96      *
       
    97      * @var integer
       
    98      */
       
    99     private $_versionUpdate = 0;
       
   100 
       
   101     /**
       
   102      * List of the segments, created by index writer
       
   103      * Array of Zend_Search_Lucene_Index_SegmentInfo objects
       
   104      *
       
   105      * @var array
       
   106      */
       
   107     private $_newSegments = array();
       
   108 
       
   109     /**
       
   110      * List of segments to be deleted on commit
       
   111      *
       
   112      * @var array
       
   113      */
       
   114     private $_segmentsToDelete = array();
       
   115 
       
   116     /**
       
   117      * Current segment to add documents
       
   118      *
       
   119      * @var Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter
       
   120      */
       
   121     private $_currentSegment = null;
       
   122 
       
   123     /**
       
   124      * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index.
       
   125      *
       
   126      * It's a reference to the corresponding Zend_Search_Lucene::$_segmentInfos array
       
   127      *
       
   128      * @var array Zend_Search_Lucene_Index_SegmentInfo
       
   129      */
       
   130     private $_segmentInfos;
       
   131 
       
   132     /**
       
   133      * Index target format version
       
   134      *
       
   135      * @var integer
       
   136      */
       
   137     private $_targetFormatVersion;
       
   138 
       
   139     /**
       
   140      * List of indexfiles extensions
       
   141      *
       
   142      * @var array
       
   143      */
       
   144     private static $_indexExtensions = array('.cfs' => '.cfs',
       
   145                                              '.cfx' => '.cfx',
       
   146                                              '.fnm' => '.fnm',
       
   147                                              '.fdx' => '.fdx',
       
   148                                              '.fdt' => '.fdt',
       
   149                                              '.tis' => '.tis',
       
   150                                              '.tii' => '.tii',
       
   151                                              '.frq' => '.frq',
       
   152                                              '.prx' => '.prx',
       
   153                                              '.tvx' => '.tvx',
       
   154                                              '.tvd' => '.tvd',
       
   155                                              '.tvf' => '.tvf',
       
   156                                              '.del' => '.del',
       
   157                                              '.sti' => '.sti' );
       
   158 
       
   159 
       
   160     /**
       
   161      * Create empty index
       
   162      *
       
   163      * @param Zend_Search_Lucene_Storage_Directory $directory
       
   164      * @param integer $generation
       
   165      * @param integer $nameCount
       
   166      */
       
   167     public static function createIndex(Zend_Search_Lucene_Storage_Directory $directory, $generation, $nameCount)
       
   168     {
       
   169         if ($generation == 0) {
       
   170             // Create index in pre-2.1 mode
       
   171             foreach ($directory->fileList() as $file) {
       
   172                 if ($file == 'deletable' ||
       
   173                     $file == 'segments'  ||
       
   174                     isset(self::$_indexExtensions[ substr($file, strlen($file)-4)]) ||
       
   175                     preg_match('/\.f\d+$/i', $file) /* matches <segment_name>.f<decimal_nmber> file names */) {
       
   176                         $directory->deleteFile($file);
       
   177                     }
       
   178             }
       
   179 
       
   180             $segmentsFile = $directory->createFile('segments');
       
   181             $segmentsFile->writeInt((int)0xFFFFFFFF);
       
   182 
       
   183             // write version (initialized by current time)
       
   184             $segmentsFile->writeLong(round(microtime(true)));
       
   185 
       
   186             // write name counter
       
   187             $segmentsFile->writeInt($nameCount);
       
   188             // write segment counter
       
   189             $segmentsFile->writeInt(0);
       
   190 
       
   191             $deletableFile = $directory->createFile('deletable');
       
   192             // write counter
       
   193             $deletableFile->writeInt(0);
       
   194         } else {
       
   195             $genFile = $directory->createFile('segments.gen');
       
   196 
       
   197             $genFile->writeInt((int)0xFFFFFFFE);
       
   198             // Write generation two times
       
   199             $genFile->writeLong($generation);
       
   200             $genFile->writeLong($generation);
       
   201 
       
   202             $segmentsFile = $directory->createFile(Zend_Search_Lucene::getSegmentFileName($generation));
       
   203             $segmentsFile->writeInt((int)0xFFFFFFFD);
       
   204 
       
   205             // write version (initialized by current time)
       
   206             $segmentsFile->writeLong(round(microtime(true)));
       
   207 
       
   208             // write name counter
       
   209             $segmentsFile->writeInt($nameCount);
       
   210             // write segment counter
       
   211             $segmentsFile->writeInt(0);
       
   212         }
       
   213     }
       
   214 
       
   215     /**
       
   216      * Open the index for writing
       
   217      *
       
   218      * @param Zend_Search_Lucene_Storage_Directory $directory
       
   219      * @param array $segmentInfos
       
   220      * @param integer $targetFormatVersion
       
   221      * @param Zend_Search_Lucene_Storage_File $cleanUpLock
       
   222      */
       
   223     public function __construct(Zend_Search_Lucene_Storage_Directory $directory, &$segmentInfos, $targetFormatVersion)
       
   224     {
       
   225         $this->_directory           = $directory;
       
   226         $this->_segmentInfos        = &$segmentInfos;
       
   227         $this->_targetFormatVersion = $targetFormatVersion;
       
   228     }
       
   229 
       
   230     /**
       
   231      * Adds a document to this index.
       
   232      *
       
   233      * @param Zend_Search_Lucene_Document $document
       
   234      */
       
   235     public function addDocument(Zend_Search_Lucene_Document $document)
       
   236     {
       
   237         /** Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter */
       
   238         require_once 'Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php';
       
   239 
       
   240         if ($this->_currentSegment === null) {
       
   241             $this->_currentSegment =
       
   242                 new Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter($this->_directory, $this->_newSegmentName());
       
   243         }
       
   244         $this->_currentSegment->addDocument($document);
       
   245 
       
   246         if ($this->_currentSegment->count() >= $this->maxBufferedDocs) {
       
   247             $this->commit();
       
   248         }
       
   249 
       
   250         $this->_maybeMergeSegments();
       
   251 
       
   252         $this->_versionUpdate++;
       
   253     }
       
   254 
       
   255 
       
   256     /**
       
   257      * Check if we have anything to merge
       
   258      *
       
   259      * @return boolean
       
   260      */
       
   261     private function _hasAnythingToMerge()
       
   262     {
       
   263         $segmentSizes = array();
       
   264         foreach ($this->_segmentInfos as $segName => $segmentInfo) {
       
   265             $segmentSizes[$segName] = $segmentInfo->count();
       
   266         }
       
   267 
       
   268         $mergePool   = array();
       
   269         $poolSize    = 0;
       
   270         $sizeToMerge = $this->maxBufferedDocs;
       
   271         asort($segmentSizes, SORT_NUMERIC);
       
   272         foreach ($segmentSizes as $segName => $size) {
       
   273             // Check, if segment comes into a new merging block
       
   274             while ($size >= $sizeToMerge) {
       
   275                 // Merge previous block if it's large enough
       
   276                 if ($poolSize >= $sizeToMerge) {
       
   277                     return true;
       
   278                 }
       
   279                 $mergePool   = array();
       
   280                 $poolSize    = 0;
       
   281 
       
   282                 $sizeToMerge *= $this->mergeFactor;
       
   283 
       
   284                 if ($sizeToMerge > $this->maxMergeDocs) {
       
   285                     return false;
       
   286                 }
       
   287             }
       
   288 
       
   289             $mergePool[] = $this->_segmentInfos[$segName];
       
   290             $poolSize += $size;
       
   291         }
       
   292 
       
   293         if ($poolSize >= $sizeToMerge) {
       
   294             return true;
       
   295         }
       
   296 
       
   297         return false;
       
   298     }
       
   299 
       
   300     /**
       
   301      * Merge segments if necessary
       
   302      */
       
   303     private function _maybeMergeSegments()
       
   304     {
       
   305         if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) {
       
   306             return;
       
   307         }
       
   308 
       
   309         if (!$this->_hasAnythingToMerge()) {
       
   310             Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
       
   311             return;
       
   312         }
       
   313 
       
   314         // Update segments list to be sure all segments are not merged yet by another process
       
   315         //
       
   316         // Segment merging functionality is concentrated in this class and surrounded
       
   317         // by optimization lock obtaining/releasing.
       
   318         // _updateSegments() refreshes segments list from the latest index generation.
       
   319         // So only new segments can be added to the index while we are merging some already existing
       
   320         // segments.
       
   321         // Newly added segments will be also included into the index by the _updateSegments() call
       
   322         // either by another process or by the current process with the commit() call at the end of _mergeSegments() method.
       
   323         // That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks.
       
   324         $this->_updateSegments();
       
   325 
       
   326         // Perform standard auto-optimization procedure
       
   327         $segmentSizes = array();
       
   328         foreach ($this->_segmentInfos as $segName => $segmentInfo) {
       
   329             $segmentSizes[$segName] = $segmentInfo->count();
       
   330         }
       
   331 
       
   332         $mergePool   = array();
       
   333         $poolSize    = 0;
       
   334         $sizeToMerge = $this->maxBufferedDocs;
       
   335         asort($segmentSizes, SORT_NUMERIC);
       
   336         foreach ($segmentSizes as $segName => $size) {
       
   337             // Check, if segment comes into a new merging block
       
   338             while ($size >= $sizeToMerge) {
       
   339                 // Merge previous block if it's large enough
       
   340                 if ($poolSize >= $sizeToMerge) {
       
   341                     $this->_mergeSegments($mergePool);
       
   342                 }
       
   343                 $mergePool   = array();
       
   344                 $poolSize    = 0;
       
   345 
       
   346                 $sizeToMerge *= $this->mergeFactor;
       
   347 
       
   348                 if ($sizeToMerge > $this->maxMergeDocs) {
       
   349                     Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
       
   350                     return;
       
   351                 }
       
   352             }
       
   353 
       
   354             $mergePool[] = $this->_segmentInfos[$segName];
       
   355             $poolSize += $size;
       
   356         }
       
   357 
       
   358         if ($poolSize >= $sizeToMerge) {
       
   359             $this->_mergeSegments($mergePool);
       
   360         }
       
   361 
       
   362         Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
       
   363     }
       
   364 
       
   365     /**
       
   366      * Merge specified segments
       
   367      *
       
   368      * $segments is an array of SegmentInfo objects
       
   369      *
       
   370      * @param array $segments
       
   371      */
       
   372     private function _mergeSegments($segments)
       
   373     {
       
   374         $newName = $this->_newSegmentName();
       
   375 
       
   376         /** Zend_Search_Lucene_Index_SegmentMerger */
       
   377         require_once 'Zend/Search/Lucene/Index/SegmentMerger.php';
       
   378         $merger = new Zend_Search_Lucene_Index_SegmentMerger($this->_directory,
       
   379                                                              $newName);
       
   380         foreach ($segments as $segmentInfo) {
       
   381             $merger->addSource($segmentInfo);
       
   382             $this->_segmentsToDelete[$segmentInfo->getName()] = $segmentInfo->getName();
       
   383         }
       
   384 
       
   385         $newSegment = $merger->merge();
       
   386         if ($newSegment !== null) {
       
   387             $this->_newSegments[$newSegment->getName()] = $newSegment;
       
   388         }
       
   389 
       
   390         $this->commit();
       
   391     }
       
   392 
       
   393     /**
       
   394      * Update segments file by adding current segment to a list
       
   395      *
       
   396      * @throws Zend_Search_Lucene_Exception
       
   397      */
       
   398     private function _updateSegments()
       
   399     {
       
   400         // Get an exclusive index lock
       
   401         Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
       
   402 
       
   403         // Write down changes for the segments
       
   404         foreach ($this->_segmentInfos as $segInfo) {
       
   405             $segInfo->writeChanges();
       
   406         }
       
   407 
       
   408 
       
   409         $generation = Zend_Search_Lucene::getActualGeneration($this->_directory);
       
   410         $segmentsFile   = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false);
       
   411         $newSegmentFile = $this->_directory->createFile(Zend_Search_Lucene::getSegmentFileName(++$generation), false);
       
   412 
       
   413         try {
       
   414             $genFile = $this->_directory->getFileObject('segments.gen', false);
       
   415         } catch (Zend_Search_Lucene_Exception $e) {
       
   416             if (strpos($e->getMessage(), 'is not readable') !== false) {
       
   417                 $genFile = $this->_directory->createFile('segments.gen');
       
   418             } else {
       
   419                 throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
       
   420             }
       
   421         }
       
   422 
       
   423         $genFile->writeInt((int)0xFFFFFFFE);
       
   424         // Write generation (first copy)
       
   425         $genFile->writeLong($generation);
       
   426 
       
   427         try {
       
   428             // Write format marker
       
   429             if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_1) {
       
   430                 $newSegmentFile->writeInt((int)0xFFFFFFFD);
       
   431             } else if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
       
   432                 $newSegmentFile->writeInt((int)0xFFFFFFFC);
       
   433             }
       
   434 
       
   435             // Read src file format identifier
       
   436             $format = $segmentsFile->readInt();
       
   437             if ($format == (int)0xFFFFFFFF) {
       
   438                 $srcFormat = Zend_Search_Lucene::FORMAT_PRE_2_1;
       
   439             } else if ($format == (int)0xFFFFFFFD) {
       
   440                 $srcFormat = Zend_Search_Lucene::FORMAT_2_1;
       
   441             } else if ($format == (int)0xFFFFFFFC) {
       
   442                 $srcFormat = Zend_Search_Lucene::FORMAT_2_3;
       
   443             } else {
       
   444                 throw new Zend_Search_Lucene_Exception('Unsupported segments file format');
       
   445             }
       
   446 
       
   447             $version = $segmentsFile->readLong() + $this->_versionUpdate;
       
   448             $this->_versionUpdate = 0;
       
   449             $newSegmentFile->writeLong($version);
       
   450 
       
   451             // Write segment name counter
       
   452             $newSegmentFile->writeInt($segmentsFile->readInt());
       
   453 
       
   454             // Get number of segments offset
       
   455             $numOfSegmentsOffset = $newSegmentFile->tell();
       
   456             // Write dummy data (segment counter)
       
   457             $newSegmentFile->writeInt(0);
       
   458 
       
   459             // Read number of segemnts
       
   460             $segmentsCount = $segmentsFile->readInt();
       
   461 
       
   462             $segments = array();
       
   463             for ($count = 0; $count < $segmentsCount; $count++) {
       
   464                 $segName = $segmentsFile->readString();
       
   465                 $segSize = $segmentsFile->readInt();
       
   466 
       
   467                 if ($srcFormat == Zend_Search_Lucene::FORMAT_PRE_2_1) {
       
   468                     // pre-2.1 index format
       
   469                     $delGen            = 0;
       
   470                     $hasSingleNormFile = false;
       
   471                     $numField          = (int)0xFFFFFFFF;
       
   472                     $isCompoundByte    = 0;
       
   473                     $docStoreOptions   = null;
       
   474                 } else {
       
   475                     $delGen = $segmentsFile->readLong();
       
   476 
       
   477                     if ($srcFormat == Zend_Search_Lucene::FORMAT_2_3) {
       
   478                         $docStoreOffset = $segmentsFile->readInt();
       
   479 
       
   480                         if ($docStoreOffset != (int)0xFFFFFFFF) {
       
   481                             $docStoreSegment        = $segmentsFile->readString();
       
   482                             $docStoreIsCompoundFile = $segmentsFile->readByte();
       
   483 
       
   484                             $docStoreOptions = array('offset'     => $docStoreOffset,
       
   485                                                      'segment'    => $docStoreSegment,
       
   486                                                      'isCompound' => ($docStoreIsCompoundFile == 1));
       
   487                         } else {
       
   488                             $docStoreOptions = null;
       
   489                         }
       
   490                     } else {
       
   491                         $docStoreOptions = null;
       
   492                     }
       
   493 
       
   494                     $hasSingleNormFile = $segmentsFile->readByte();
       
   495                     $numField          = $segmentsFile->readInt();
       
   496 
       
   497                     $normGens = array();
       
   498                     if ($numField != (int)0xFFFFFFFF) {
       
   499                         for ($count1 = 0; $count1 < $numField; $count1++) {
       
   500                             $normGens[] = $segmentsFile->readLong();
       
   501                         }
       
   502                     }
       
   503                     $isCompoundByte    = $segmentsFile->readByte();
       
   504                 }
       
   505 
       
   506                 if (!in_array($segName, $this->_segmentsToDelete)) {
       
   507                     // Load segment if necessary
       
   508                     if (!isset($this->_segmentInfos[$segName])) {
       
   509                         if ($isCompoundByte == 0xFF) {
       
   510                             // The segment is not a compound file
       
   511                             $isCompound = false;
       
   512                         } else if ($isCompoundByte == 0x00) {
       
   513                             // The status is unknown
       
   514                             $isCompound = null;
       
   515                         } else if ($isCompoundByte == 0x01) {
       
   516                             // The segment is a compound file
       
   517                             $isCompound = true;
       
   518                         }
       
   519 
       
   520                         /** Zend_Search_Lucene_Index_SegmentInfo */
       
   521                         require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
       
   522                         $this->_segmentInfos[$segName] =
       
   523                                     new Zend_Search_Lucene_Index_SegmentInfo($this->_directory,
       
   524                                                                              $segName,
       
   525                                                                              $segSize,
       
   526                                                                              $delGen,
       
   527                                                                              $docStoreOptions,
       
   528                                                                              $hasSingleNormFile,
       
   529                                                                              $isCompound);
       
   530                     } else {
       
   531                         // Retrieve actual deletions file generation number
       
   532                         $delGen = $this->_segmentInfos[$segName]->getDelGen();
       
   533                     }
       
   534 
       
   535                     $newSegmentFile->writeString($segName);
       
   536                     $newSegmentFile->writeInt($segSize);
       
   537                     $newSegmentFile->writeLong($delGen);
       
   538                     if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
       
   539                         if ($docStoreOptions !== null) {
       
   540                             $newSegmentFile->writeInt($docStoreOffset);
       
   541                             $newSegmentFile->writeString($docStoreSegment);
       
   542                             $newSegmentFile->writeByte($docStoreIsCompoundFile);
       
   543                         } else {
       
   544                             // Set DocStoreOffset to -1
       
   545                             $newSegmentFile->writeInt((int)0xFFFFFFFF);
       
   546                         }
       
   547                     } else if ($docStoreOptions !== null) {
       
   548                         // Release index write lock
       
   549                         Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
       
   550 
       
   551                         throw new Zend_Search_Lucene_Exception('Index conversion to lower format version is not supported.');
       
   552                     }
       
   553 
       
   554                     $newSegmentFile->writeByte($hasSingleNormFile);
       
   555                     $newSegmentFile->writeInt($numField);
       
   556                     if ($numField != (int)0xFFFFFFFF) {
       
   557                         foreach ($normGens as $normGen) {
       
   558                             $newSegmentFile->writeLong($normGen);
       
   559                         }
       
   560                     }
       
   561                     $newSegmentFile->writeByte($isCompoundByte);
       
   562 
       
   563                     $segments[$segName] = $segSize;
       
   564                 }
       
   565             }
       
   566             $segmentsFile->close();
       
   567 
       
   568             $segmentsCount = count($segments) + count($this->_newSegments);
       
   569 
       
   570             foreach ($this->_newSegments as $segName => $segmentInfo) {
       
   571                 $newSegmentFile->writeString($segName);
       
   572                 $newSegmentFile->writeInt($segmentInfo->count());
       
   573 
       
   574                 // delete file generation: -1 (there is no delete file yet)
       
   575                 $newSegmentFile->writeInt((int)0xFFFFFFFF);$newSegmentFile->writeInt((int)0xFFFFFFFF);
       
   576                 if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) {
       
   577                     // docStoreOffset: -1 (segment doesn't use shared doc store)
       
   578                     $newSegmentFile->writeInt((int)0xFFFFFFFF);
       
   579                 }
       
   580                 // HasSingleNormFile
       
   581                 $newSegmentFile->writeByte($segmentInfo->hasSingleNormFile());
       
   582                 // NumField
       
   583                 $newSegmentFile->writeInt((int)0xFFFFFFFF);
       
   584                 // IsCompoundFile
       
   585                 $newSegmentFile->writeByte($segmentInfo->isCompound() ? 1 : -1);
       
   586 
       
   587                 $segments[$segmentInfo->getName()] = $segmentInfo->count();
       
   588                 $this->_segmentInfos[$segName] = $segmentInfo;
       
   589             }
       
   590             $this->_newSegments = array();
       
   591 
       
   592             $newSegmentFile->seek($numOfSegmentsOffset);
       
   593             $newSegmentFile->writeInt($segmentsCount);  // Update segments count
       
   594             $newSegmentFile->close();
       
   595         } catch (Exception $e) {
       
   596             /** Restore previous index generation */
       
   597             $generation--;
       
   598             $genFile->seek(4, SEEK_SET);
       
   599             // Write generation number twice
       
   600             $genFile->writeLong($generation); $genFile->writeLong($generation);
       
   601 
       
   602             // Release index write lock
       
   603             Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
       
   604 
       
   605             // Throw the exception
       
   606             require_once 'Zend/Search/Lucene/Exception.php';
       
   607             throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
       
   608         }
       
   609 
       
   610         // Write generation (second copy)
       
   611         $genFile->writeLong($generation);
       
   612 
       
   613 
       
   614         // Check if another update or read process is not running now
       
   615         // If yes, skip clean-up procedure
       
   616         if (Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory)) {
       
   617             /**
       
   618              * Clean-up directory
       
   619              */
       
   620             $filesToDelete = array();
       
   621             $filesTypes    = array();
       
   622             $filesNumbers  = array();
       
   623 
       
   624             // list of .del files of currently used segments
       
   625             // each segment can have several generations of .del files
       
   626             // only last should not be deleted
       
   627             $delFiles = array();
       
   628 
       
   629             foreach ($this->_directory->fileList() as $file) {
       
   630                 if ($file == 'deletable') {
       
   631                     // 'deletable' file
       
   632                     $filesToDelete[] = $file;
       
   633                     $filesTypes[]    = 0; // delete this file first, since it's not used starting from Lucene v2.1
       
   634                     $filesNumbers[]  = 0;
       
   635                 } else if ($file == 'segments') {
       
   636                     // 'segments' file
       
   637                     $filesToDelete[] = $file;
       
   638                     $filesTypes[]    = 1; // second file to be deleted "zero" version of segments file (Lucene pre-2.1)
       
   639                     $filesNumbers[]  = 0;
       
   640                 } else if (preg_match('/^segments_[a-zA-Z0-9]+$/i', $file)) {
       
   641                     // 'segments_xxx' file
       
   642                     // Check if it's not a just created generation file
       
   643                     if ($file != Zend_Search_Lucene::getSegmentFileName($generation)) {
       
   644                         $filesToDelete[] = $file;
       
   645                         $filesTypes[]    = 2; // first group of files for deletions
       
   646                         $filesNumbers[]  = (int)base_convert(substr($file, 9), 36, 10); // ordered by segment generation numbers
       
   647                     }
       
   648                 } else if (preg_match('/(^_([a-zA-Z0-9]+))\.f\d+$/i', $file, $matches)) {
       
   649                     // one of per segment files ('<segment_name>.f<decimal_number>')
       
   650                     // Check if it's not one of the segments in the current segments set
       
   651                     if (!isset($segments[$matches[1]])) {
       
   652                         $filesToDelete[] = $file;
       
   653                         $filesTypes[]    = 3; // second group of files for deletions
       
   654                         $filesNumbers[]  = (int)base_convert($matches[2], 36, 10); // order by segment number
       
   655                     }
       
   656                 } else if (preg_match('/(^_([a-zA-Z0-9]+))(_([a-zA-Z0-9]+))\.del$/i', $file, $matches)) {
       
   657                     // one of per segment files ('<segment_name>_<del_generation>.del' where <segment_name> is '_<segment_number>')
       
   658                     // Check if it's not one of the segments in the current segments set
       
   659                     if (!isset($segments[$matches[1]])) {
       
   660                         $filesToDelete[] = $file;
       
   661                         $filesTypes[]    = 3; // second group of files for deletions
       
   662                         $filesNumbers[]  = (int)base_convert($matches[2], 36, 10); // order by segment number
       
   663                     } else {
       
   664                         $segmentNumber = (int)base_convert($matches[2], 36, 10);
       
   665                         $delGeneration = (int)base_convert($matches[4], 36, 10);
       
   666                         if (!isset($delFiles[$segmentNumber])) {
       
   667                             $delFiles[$segmentNumber] = array();
       
   668                         }
       
   669                         $delFiles[$segmentNumber][$delGeneration] = $file;
       
   670                     }
       
   671                 } else if (isset(self::$_indexExtensions[substr($file, strlen($file)-4)])) {
       
   672                     // one of per segment files ('<segment_name>.<ext>')
       
   673                     $segmentName = substr($file, 0, strlen($file) - 4);
       
   674                     // Check if it's not one of the segments in the current segments set
       
   675                     if (!isset($segments[$segmentName])  &&
       
   676                         ($this->_currentSegment === null  ||  $this->_currentSegment->getName() != $segmentName)) {
       
   677                         $filesToDelete[] = $file;
       
   678                         $filesTypes[]    = 3; // second group of files for deletions
       
   679                         $filesNumbers[]  = (int)base_convert(substr($file, 1 /* skip '_' */, strlen($file)-5), 36, 10); // order by segment number
       
   680                     }
       
   681                 }
       
   682             }
       
   683 
       
   684             $maxGenNumber = 0;
       
   685             // process .del files of currently used segments
       
   686             foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
       
   687                 ksort($delFiles[$segmentNumber], SORT_NUMERIC);
       
   688                 array_pop($delFiles[$segmentNumber]); // remove last delete file generation from candidates for deleting
       
   689 
       
   690                 end($delFiles[$segmentNumber]);
       
   691                 $lastGenNumber = key($delFiles[$segmentNumber]);
       
   692                 if ($lastGenNumber > $maxGenNumber) {
       
   693                     $maxGenNumber = $lastGenNumber;
       
   694                 }
       
   695             }
       
   696             foreach ($delFiles as $segmentNumber => $segmentDelFiles) {
       
   697                 foreach ($segmentDelFiles as $delGeneration => $file) {
       
   698                         $filesToDelete[] = $file;
       
   699                         $filesTypes[]    = 4; // third group of files for deletions
       
   700                         $filesNumbers[]  = $segmentNumber*$maxGenNumber + $delGeneration; // order by <segment_number>,<del_generation> pair
       
   701                 }
       
   702             }
       
   703 
       
   704             // Reorder files for deleting
       
   705             array_multisort($filesTypes,    SORT_ASC, SORT_NUMERIC,
       
   706                             $filesNumbers,  SORT_ASC, SORT_NUMERIC,
       
   707                             $filesToDelete, SORT_ASC, SORT_STRING);
       
   708 
       
   709             foreach ($filesToDelete as $file) {
       
   710                 try {
       
   711                     /** Skip shared docstore segments deleting */
       
   712                     /** @todo Process '.cfx' files to check if them are already unused */
       
   713                     if (substr($file, strlen($file)-4) != '.cfx') {
       
   714                         $this->_directory->deleteFile($file);
       
   715                     }
       
   716                 } catch (Zend_Search_Lucene_Exception $e) {
       
   717                     if (strpos($e->getMessage(), 'Can\'t delete file') === false) {
       
   718                         // That's not "file is under processing or already deleted" exception
       
   719                         // Pass it through
       
   720                         throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
       
   721                     }
       
   722                 }
       
   723             }
       
   724 
       
   725             // Return read lock into the previous state
       
   726             Zend_Search_Lucene_LockManager::deEscalateReadLock($this->_directory);
       
   727         } else {
       
   728             // Only release resources if another index reader is running now
       
   729             foreach ($this->_segmentsToDelete as $segName) {
       
   730                 foreach (self::$_indexExtensions as $ext) {
       
   731                     $this->_directory->purgeFile($segName . $ext);
       
   732                 }
       
   733             }
       
   734         }
       
   735 
       
   736         // Clean-up _segmentsToDelete container
       
   737         $this->_segmentsToDelete = array();
       
   738 
       
   739 
       
   740         // Release index write lock
       
   741         Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
       
   742 
       
   743         // Remove unused segments from segments list
       
   744         foreach ($this->_segmentInfos as $segName => $segmentInfo) {
       
   745             if (!isset($segments[$segName])) {
       
   746                 unset($this->_segmentInfos[$segName]);
       
   747             }
       
   748         }
       
   749     }
       
   750 
       
   751     /**
       
   752      * Commit current changes
       
   753      */
       
   754     public function commit()
       
   755     {
       
   756         if ($this->_currentSegment !== null) {
       
   757             $newSegment = $this->_currentSegment->close();
       
   758             if ($newSegment !== null) {
       
   759                 $this->_newSegments[$newSegment->getName()] = $newSegment;
       
   760             }
       
   761             $this->_currentSegment = null;
       
   762         }
       
   763 
       
   764         $this->_updateSegments();
       
   765     }
       
   766 
       
   767 
       
   768     /**
       
   769      * Merges the provided indexes into this index.
       
   770      *
       
   771      * @param array $readers
       
   772      * @return void
       
   773      */
       
   774     public function addIndexes($readers)
       
   775     {
       
   776         /**
       
   777          * @todo implementation
       
   778          */
       
   779     }
       
   780 
       
   781     /**
       
   782      * Merges all segments together into new one
       
   783      *
       
   784      * Returns true on success and false if another optimization or auto-optimization process
       
   785      * is running now
       
   786      *
       
   787      * @return boolean
       
   788      */
       
   789     public function optimize()
       
   790     {
       
   791         if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) {
       
   792             return false;
       
   793         }
       
   794 
       
   795         // Update segments list to be sure all segments are not merged yet by another process
       
   796         //
       
   797         // Segment merging functionality is concentrated in this class and surrounded
       
   798         // by optimization lock obtaining/releasing.
       
   799         // _updateSegments() refreshes segments list from the latest index generation.
       
   800         // So only new segments can be added to the index while we are merging some already existing
       
   801         // segments.
       
   802         // Newly added segments will be also included into the index by the _updateSegments() call
       
   803         // either by another process or by the current process with the commit() call at the end of _mergeSegments() method.
       
   804         // That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks.
       
   805         $this->_updateSegments();
       
   806 
       
   807         $this->_mergeSegments($this->_segmentInfos);
       
   808 
       
   809         Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory);
       
   810 
       
   811         return true;
       
   812     }
       
   813 
       
   814     /**
       
   815      * Get name for new segment
       
   816      *
       
   817      * @return string
       
   818      */
       
   819     private function _newSegmentName()
       
   820     {
       
   821         Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory);
       
   822 
       
   823         $generation = Zend_Search_Lucene::getActualGeneration($this->_directory);
       
   824         $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false);
       
   825 
       
   826         $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
       
   827         $segmentNameCounter = $segmentsFile->readInt();
       
   828 
       
   829         $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version)
       
   830         $segmentsFile->writeInt($segmentNameCounter + 1);
       
   831 
       
   832         // Flash output to guarantee that wrong value will not be loaded between unlock and
       
   833         // return (which calls $segmentsFile destructor)
       
   834         $segmentsFile->flush();
       
   835 
       
   836         Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory);
       
   837 
       
   838         return '_' . base_convert($segmentNameCounter, 10, 36);
       
   839     }
       
   840 
       
   841 }