|
1 <?php |
|
2 /** |
|
3 * Zend Framework |
|
4 * |
|
5 * LICENSE |
|
6 * |
|
7 * This source file is subject to the new BSD license that is bundled |
|
8 * with this package in the file LICENSE.txt. |
|
9 * It is also available through the world-wide-web at this URL: |
|
10 * http://framework.zend.com/license/new-bsd |
|
11 * If you did not receive a copy of the license and are unable to |
|
12 * obtain it through the world-wide-web, please send an email |
|
13 * to license@zend.com so we can send you a copy immediately. |
|
14 * |
|
15 * @category Zend |
|
16 * @package Zend_Search_Lucene |
|
17 * @subpackage Index |
|
18 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
19 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
20 * @version $Id: Writer.php 20096 2010-01-06 02:05:09Z bkarwin $ |
|
21 */ |
|
22 |
|
23 |
|
24 /** Zend_Search_Lucene_LockManager */ |
|
25 require_once 'Zend/Search/Lucene/LockManager.php'; |
|
26 |
|
27 |
|
28 /** |
|
29 * @category Zend |
|
30 * @package Zend_Search_Lucene |
|
31 * @subpackage Index |
|
32 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
33 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
34 */ |
|
35 class Zend_Search_Lucene_Index_Writer |
|
36 { |
|
37 /** |
|
38 * @todo Implement Analyzer substitution |
|
39 * @todo Implement Zend_Search_Lucene_Storage_DirectoryRAM and Zend_Search_Lucene_Storage_FileRAM to use it for |
|
40 * temporary index files |
|
41 * @todo Directory lock processing |
|
42 */ |
|
43 |
|
44 /** |
|
45 * Number of documents required before the buffered in-memory |
|
46 * documents are written into a new Segment |
|
47 * |
|
48 * Default value is 10 |
|
49 * |
|
50 * @var integer |
|
51 */ |
|
52 public $maxBufferedDocs = 10; |
|
53 |
|
54 /** |
|
55 * Largest number of documents ever merged by addDocument(). |
|
56 * Small values (e.g., less than 10,000) are best for interactive indexing, |
|
57 * as this limits the length of pauses while indexing to a few seconds. |
|
58 * Larger values are best for batched indexing and speedier searches. |
|
59 * |
|
60 * Default value is PHP_INT_MAX |
|
61 * |
|
62 * @var integer |
|
63 */ |
|
64 public $maxMergeDocs = PHP_INT_MAX; |
|
65 |
|
66 /** |
|
67 * Determines how often segment indices are merged by addDocument(). |
|
68 * |
|
69 * With smaller values, less RAM is used while indexing, |
|
70 * and searches on unoptimized indices are faster, |
|
71 * but indexing speed is slower. |
|
72 * |
|
73 * With larger values, more RAM is used during indexing, |
|
74 * and while searches on unoptimized indices are slower, |
|
75 * indexing is faster. |
|
76 * |
|
77 * Thus larger values (> 10) are best for batch index creation, |
|
78 * and smaller values (< 10) for indices that are interactively maintained. |
|
79 * |
|
80 * Default value is 10 |
|
81 * |
|
82 * @var integer |
|
83 */ |
|
84 public $mergeFactor = 10; |
|
85 |
|
86 /** |
|
87 * File system adapter. |
|
88 * |
|
89 * @var Zend_Search_Lucene_Storage_Directory |
|
90 */ |
|
91 private $_directory = null; |
|
92 |
|
93 |
|
94 /** |
|
95 * Changes counter. |
|
96 * |
|
97 * @var integer |
|
98 */ |
|
99 private $_versionUpdate = 0; |
|
100 |
|
101 /** |
|
102 * List of the segments, created by index writer |
|
103 * Array of Zend_Search_Lucene_Index_SegmentInfo objects |
|
104 * |
|
105 * @var array |
|
106 */ |
|
107 private $_newSegments = array(); |
|
108 |
|
109 /** |
|
110 * List of segments to be deleted on commit |
|
111 * |
|
112 * @var array |
|
113 */ |
|
114 private $_segmentsToDelete = array(); |
|
115 |
|
116 /** |
|
117 * Current segment to add documents |
|
118 * |
|
119 * @var Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter |
|
120 */ |
|
121 private $_currentSegment = null; |
|
122 |
|
123 /** |
|
124 * Array of Zend_Search_Lucene_Index_SegmentInfo objects for this index. |
|
125 * |
|
126 * It's a reference to the corresponding Zend_Search_Lucene::$_segmentInfos array |
|
127 * |
|
128 * @var array Zend_Search_Lucene_Index_SegmentInfo |
|
129 */ |
|
130 private $_segmentInfos; |
|
131 |
|
132 /** |
|
133 * Index target format version |
|
134 * |
|
135 * @var integer |
|
136 */ |
|
137 private $_targetFormatVersion; |
|
138 |
|
139 /** |
|
140 * List of indexfiles extensions |
|
141 * |
|
142 * @var array |
|
143 */ |
|
144 private static $_indexExtensions = array('.cfs' => '.cfs', |
|
145 '.cfx' => '.cfx', |
|
146 '.fnm' => '.fnm', |
|
147 '.fdx' => '.fdx', |
|
148 '.fdt' => '.fdt', |
|
149 '.tis' => '.tis', |
|
150 '.tii' => '.tii', |
|
151 '.frq' => '.frq', |
|
152 '.prx' => '.prx', |
|
153 '.tvx' => '.tvx', |
|
154 '.tvd' => '.tvd', |
|
155 '.tvf' => '.tvf', |
|
156 '.del' => '.del', |
|
157 '.sti' => '.sti' ); |
|
158 |
|
159 |
|
160 /** |
|
161 * Create empty index |
|
162 * |
|
163 * @param Zend_Search_Lucene_Storage_Directory $directory |
|
164 * @param integer $generation |
|
165 * @param integer $nameCount |
|
166 */ |
|
167 public static function createIndex(Zend_Search_Lucene_Storage_Directory $directory, $generation, $nameCount) |
|
168 { |
|
169 if ($generation == 0) { |
|
170 // Create index in pre-2.1 mode |
|
171 foreach ($directory->fileList() as $file) { |
|
172 if ($file == 'deletable' || |
|
173 $file == 'segments' || |
|
174 isset(self::$_indexExtensions[ substr($file, strlen($file)-4)]) || |
|
175 preg_match('/\.f\d+$/i', $file) /* matches <segment_name>.f<decimal_nmber> file names */) { |
|
176 $directory->deleteFile($file); |
|
177 } |
|
178 } |
|
179 |
|
180 $segmentsFile = $directory->createFile('segments'); |
|
181 $segmentsFile->writeInt((int)0xFFFFFFFF); |
|
182 |
|
183 // write version (initialized by current time) |
|
184 $segmentsFile->writeLong(round(microtime(true))); |
|
185 |
|
186 // write name counter |
|
187 $segmentsFile->writeInt($nameCount); |
|
188 // write segment counter |
|
189 $segmentsFile->writeInt(0); |
|
190 |
|
191 $deletableFile = $directory->createFile('deletable'); |
|
192 // write counter |
|
193 $deletableFile->writeInt(0); |
|
194 } else { |
|
195 $genFile = $directory->createFile('segments.gen'); |
|
196 |
|
197 $genFile->writeInt((int)0xFFFFFFFE); |
|
198 // Write generation two times |
|
199 $genFile->writeLong($generation); |
|
200 $genFile->writeLong($generation); |
|
201 |
|
202 $segmentsFile = $directory->createFile(Zend_Search_Lucene::getSegmentFileName($generation)); |
|
203 $segmentsFile->writeInt((int)0xFFFFFFFD); |
|
204 |
|
205 // write version (initialized by current time) |
|
206 $segmentsFile->writeLong(round(microtime(true))); |
|
207 |
|
208 // write name counter |
|
209 $segmentsFile->writeInt($nameCount); |
|
210 // write segment counter |
|
211 $segmentsFile->writeInt(0); |
|
212 } |
|
213 } |
|
214 |
|
215 /** |
|
216 * Open the index for writing |
|
217 * |
|
218 * @param Zend_Search_Lucene_Storage_Directory $directory |
|
219 * @param array $segmentInfos |
|
220 * @param integer $targetFormatVersion |
|
221 * @param Zend_Search_Lucene_Storage_File $cleanUpLock |
|
222 */ |
|
223 public function __construct(Zend_Search_Lucene_Storage_Directory $directory, &$segmentInfos, $targetFormatVersion) |
|
224 { |
|
225 $this->_directory = $directory; |
|
226 $this->_segmentInfos = &$segmentInfos; |
|
227 $this->_targetFormatVersion = $targetFormatVersion; |
|
228 } |
|
229 |
|
230 /** |
|
231 * Adds a document to this index. |
|
232 * |
|
233 * @param Zend_Search_Lucene_Document $document |
|
234 */ |
|
235 public function addDocument(Zend_Search_Lucene_Document $document) |
|
236 { |
|
237 /** Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter */ |
|
238 require_once 'Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php'; |
|
239 |
|
240 if ($this->_currentSegment === null) { |
|
241 $this->_currentSegment = |
|
242 new Zend_Search_Lucene_Index_SegmentWriter_DocumentWriter($this->_directory, $this->_newSegmentName()); |
|
243 } |
|
244 $this->_currentSegment->addDocument($document); |
|
245 |
|
246 if ($this->_currentSegment->count() >= $this->maxBufferedDocs) { |
|
247 $this->commit(); |
|
248 } |
|
249 |
|
250 $this->_maybeMergeSegments(); |
|
251 |
|
252 $this->_versionUpdate++; |
|
253 } |
|
254 |
|
255 |
|
256 /** |
|
257 * Check if we have anything to merge |
|
258 * |
|
259 * @return boolean |
|
260 */ |
|
261 private function _hasAnythingToMerge() |
|
262 { |
|
263 $segmentSizes = array(); |
|
264 foreach ($this->_segmentInfos as $segName => $segmentInfo) { |
|
265 $segmentSizes[$segName] = $segmentInfo->count(); |
|
266 } |
|
267 |
|
268 $mergePool = array(); |
|
269 $poolSize = 0; |
|
270 $sizeToMerge = $this->maxBufferedDocs; |
|
271 asort($segmentSizes, SORT_NUMERIC); |
|
272 foreach ($segmentSizes as $segName => $size) { |
|
273 // Check, if segment comes into a new merging block |
|
274 while ($size >= $sizeToMerge) { |
|
275 // Merge previous block if it's large enough |
|
276 if ($poolSize >= $sizeToMerge) { |
|
277 return true; |
|
278 } |
|
279 $mergePool = array(); |
|
280 $poolSize = 0; |
|
281 |
|
282 $sizeToMerge *= $this->mergeFactor; |
|
283 |
|
284 if ($sizeToMerge > $this->maxMergeDocs) { |
|
285 return false; |
|
286 } |
|
287 } |
|
288 |
|
289 $mergePool[] = $this->_segmentInfos[$segName]; |
|
290 $poolSize += $size; |
|
291 } |
|
292 |
|
293 if ($poolSize >= $sizeToMerge) { |
|
294 return true; |
|
295 } |
|
296 |
|
297 return false; |
|
298 } |
|
299 |
|
300 /** |
|
301 * Merge segments if necessary |
|
302 */ |
|
303 private function _maybeMergeSegments() |
|
304 { |
|
305 if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) { |
|
306 return; |
|
307 } |
|
308 |
|
309 if (!$this->_hasAnythingToMerge()) { |
|
310 Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory); |
|
311 return; |
|
312 } |
|
313 |
|
314 // Update segments list to be sure all segments are not merged yet by another process |
|
315 // |
|
316 // Segment merging functionality is concentrated in this class and surrounded |
|
317 // by optimization lock obtaining/releasing. |
|
318 // _updateSegments() refreshes segments list from the latest index generation. |
|
319 // So only new segments can be added to the index while we are merging some already existing |
|
320 // segments. |
|
321 // Newly added segments will be also included into the index by the _updateSegments() call |
|
322 // either by another process or by the current process with the commit() call at the end of _mergeSegments() method. |
|
323 // That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks. |
|
324 $this->_updateSegments(); |
|
325 |
|
326 // Perform standard auto-optimization procedure |
|
327 $segmentSizes = array(); |
|
328 foreach ($this->_segmentInfos as $segName => $segmentInfo) { |
|
329 $segmentSizes[$segName] = $segmentInfo->count(); |
|
330 } |
|
331 |
|
332 $mergePool = array(); |
|
333 $poolSize = 0; |
|
334 $sizeToMerge = $this->maxBufferedDocs; |
|
335 asort($segmentSizes, SORT_NUMERIC); |
|
336 foreach ($segmentSizes as $segName => $size) { |
|
337 // Check, if segment comes into a new merging block |
|
338 while ($size >= $sizeToMerge) { |
|
339 // Merge previous block if it's large enough |
|
340 if ($poolSize >= $sizeToMerge) { |
|
341 $this->_mergeSegments($mergePool); |
|
342 } |
|
343 $mergePool = array(); |
|
344 $poolSize = 0; |
|
345 |
|
346 $sizeToMerge *= $this->mergeFactor; |
|
347 |
|
348 if ($sizeToMerge > $this->maxMergeDocs) { |
|
349 Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory); |
|
350 return; |
|
351 } |
|
352 } |
|
353 |
|
354 $mergePool[] = $this->_segmentInfos[$segName]; |
|
355 $poolSize += $size; |
|
356 } |
|
357 |
|
358 if ($poolSize >= $sizeToMerge) { |
|
359 $this->_mergeSegments($mergePool); |
|
360 } |
|
361 |
|
362 Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory); |
|
363 } |
|
364 |
|
365 /** |
|
366 * Merge specified segments |
|
367 * |
|
368 * $segments is an array of SegmentInfo objects |
|
369 * |
|
370 * @param array $segments |
|
371 */ |
|
372 private function _mergeSegments($segments) |
|
373 { |
|
374 $newName = $this->_newSegmentName(); |
|
375 |
|
376 /** Zend_Search_Lucene_Index_SegmentMerger */ |
|
377 require_once 'Zend/Search/Lucene/Index/SegmentMerger.php'; |
|
378 $merger = new Zend_Search_Lucene_Index_SegmentMerger($this->_directory, |
|
379 $newName); |
|
380 foreach ($segments as $segmentInfo) { |
|
381 $merger->addSource($segmentInfo); |
|
382 $this->_segmentsToDelete[$segmentInfo->getName()] = $segmentInfo->getName(); |
|
383 } |
|
384 |
|
385 $newSegment = $merger->merge(); |
|
386 if ($newSegment !== null) { |
|
387 $this->_newSegments[$newSegment->getName()] = $newSegment; |
|
388 } |
|
389 |
|
390 $this->commit(); |
|
391 } |
|
392 |
|
393 /** |
|
394 * Update segments file by adding current segment to a list |
|
395 * |
|
396 * @throws Zend_Search_Lucene_Exception |
|
397 */ |
|
398 private function _updateSegments() |
|
399 { |
|
400 // Get an exclusive index lock |
|
401 Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory); |
|
402 |
|
403 // Write down changes for the segments |
|
404 foreach ($this->_segmentInfos as $segInfo) { |
|
405 $segInfo->writeChanges(); |
|
406 } |
|
407 |
|
408 |
|
409 $generation = Zend_Search_Lucene::getActualGeneration($this->_directory); |
|
410 $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false); |
|
411 $newSegmentFile = $this->_directory->createFile(Zend_Search_Lucene::getSegmentFileName(++$generation), false); |
|
412 |
|
413 try { |
|
414 $genFile = $this->_directory->getFileObject('segments.gen', false); |
|
415 } catch (Zend_Search_Lucene_Exception $e) { |
|
416 if (strpos($e->getMessage(), 'is not readable') !== false) { |
|
417 $genFile = $this->_directory->createFile('segments.gen'); |
|
418 } else { |
|
419 throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e); |
|
420 } |
|
421 } |
|
422 |
|
423 $genFile->writeInt((int)0xFFFFFFFE); |
|
424 // Write generation (first copy) |
|
425 $genFile->writeLong($generation); |
|
426 |
|
427 try { |
|
428 // Write format marker |
|
429 if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_1) { |
|
430 $newSegmentFile->writeInt((int)0xFFFFFFFD); |
|
431 } else if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) { |
|
432 $newSegmentFile->writeInt((int)0xFFFFFFFC); |
|
433 } |
|
434 |
|
435 // Read src file format identifier |
|
436 $format = $segmentsFile->readInt(); |
|
437 if ($format == (int)0xFFFFFFFF) { |
|
438 $srcFormat = Zend_Search_Lucene::FORMAT_PRE_2_1; |
|
439 } else if ($format == (int)0xFFFFFFFD) { |
|
440 $srcFormat = Zend_Search_Lucene::FORMAT_2_1; |
|
441 } else if ($format == (int)0xFFFFFFFC) { |
|
442 $srcFormat = Zend_Search_Lucene::FORMAT_2_3; |
|
443 } else { |
|
444 throw new Zend_Search_Lucene_Exception('Unsupported segments file format'); |
|
445 } |
|
446 |
|
447 $version = $segmentsFile->readLong() + $this->_versionUpdate; |
|
448 $this->_versionUpdate = 0; |
|
449 $newSegmentFile->writeLong($version); |
|
450 |
|
451 // Write segment name counter |
|
452 $newSegmentFile->writeInt($segmentsFile->readInt()); |
|
453 |
|
454 // Get number of segments offset |
|
455 $numOfSegmentsOffset = $newSegmentFile->tell(); |
|
456 // Write dummy data (segment counter) |
|
457 $newSegmentFile->writeInt(0); |
|
458 |
|
459 // Read number of segemnts |
|
460 $segmentsCount = $segmentsFile->readInt(); |
|
461 |
|
462 $segments = array(); |
|
463 for ($count = 0; $count < $segmentsCount; $count++) { |
|
464 $segName = $segmentsFile->readString(); |
|
465 $segSize = $segmentsFile->readInt(); |
|
466 |
|
467 if ($srcFormat == Zend_Search_Lucene::FORMAT_PRE_2_1) { |
|
468 // pre-2.1 index format |
|
469 $delGen = 0; |
|
470 $hasSingleNormFile = false; |
|
471 $numField = (int)0xFFFFFFFF; |
|
472 $isCompoundByte = 0; |
|
473 $docStoreOptions = null; |
|
474 } else { |
|
475 $delGen = $segmentsFile->readLong(); |
|
476 |
|
477 if ($srcFormat == Zend_Search_Lucene::FORMAT_2_3) { |
|
478 $docStoreOffset = $segmentsFile->readInt(); |
|
479 |
|
480 if ($docStoreOffset != (int)0xFFFFFFFF) { |
|
481 $docStoreSegment = $segmentsFile->readString(); |
|
482 $docStoreIsCompoundFile = $segmentsFile->readByte(); |
|
483 |
|
484 $docStoreOptions = array('offset' => $docStoreOffset, |
|
485 'segment' => $docStoreSegment, |
|
486 'isCompound' => ($docStoreIsCompoundFile == 1)); |
|
487 } else { |
|
488 $docStoreOptions = null; |
|
489 } |
|
490 } else { |
|
491 $docStoreOptions = null; |
|
492 } |
|
493 |
|
494 $hasSingleNormFile = $segmentsFile->readByte(); |
|
495 $numField = $segmentsFile->readInt(); |
|
496 |
|
497 $normGens = array(); |
|
498 if ($numField != (int)0xFFFFFFFF) { |
|
499 for ($count1 = 0; $count1 < $numField; $count1++) { |
|
500 $normGens[] = $segmentsFile->readLong(); |
|
501 } |
|
502 } |
|
503 $isCompoundByte = $segmentsFile->readByte(); |
|
504 } |
|
505 |
|
506 if (!in_array($segName, $this->_segmentsToDelete)) { |
|
507 // Load segment if necessary |
|
508 if (!isset($this->_segmentInfos[$segName])) { |
|
509 if ($isCompoundByte == 0xFF) { |
|
510 // The segment is not a compound file |
|
511 $isCompound = false; |
|
512 } else if ($isCompoundByte == 0x00) { |
|
513 // The status is unknown |
|
514 $isCompound = null; |
|
515 } else if ($isCompoundByte == 0x01) { |
|
516 // The segment is a compound file |
|
517 $isCompound = true; |
|
518 } |
|
519 |
|
520 /** Zend_Search_Lucene_Index_SegmentInfo */ |
|
521 require_once 'Zend/Search/Lucene/Index/SegmentInfo.php'; |
|
522 $this->_segmentInfos[$segName] = |
|
523 new Zend_Search_Lucene_Index_SegmentInfo($this->_directory, |
|
524 $segName, |
|
525 $segSize, |
|
526 $delGen, |
|
527 $docStoreOptions, |
|
528 $hasSingleNormFile, |
|
529 $isCompound); |
|
530 } else { |
|
531 // Retrieve actual deletions file generation number |
|
532 $delGen = $this->_segmentInfos[$segName]->getDelGen(); |
|
533 } |
|
534 |
|
535 $newSegmentFile->writeString($segName); |
|
536 $newSegmentFile->writeInt($segSize); |
|
537 $newSegmentFile->writeLong($delGen); |
|
538 if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) { |
|
539 if ($docStoreOptions !== null) { |
|
540 $newSegmentFile->writeInt($docStoreOffset); |
|
541 $newSegmentFile->writeString($docStoreSegment); |
|
542 $newSegmentFile->writeByte($docStoreIsCompoundFile); |
|
543 } else { |
|
544 // Set DocStoreOffset to -1 |
|
545 $newSegmentFile->writeInt((int)0xFFFFFFFF); |
|
546 } |
|
547 } else if ($docStoreOptions !== null) { |
|
548 // Release index write lock |
|
549 Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); |
|
550 |
|
551 throw new Zend_Search_Lucene_Exception('Index conversion to lower format version is not supported.'); |
|
552 } |
|
553 |
|
554 $newSegmentFile->writeByte($hasSingleNormFile); |
|
555 $newSegmentFile->writeInt($numField); |
|
556 if ($numField != (int)0xFFFFFFFF) { |
|
557 foreach ($normGens as $normGen) { |
|
558 $newSegmentFile->writeLong($normGen); |
|
559 } |
|
560 } |
|
561 $newSegmentFile->writeByte($isCompoundByte); |
|
562 |
|
563 $segments[$segName] = $segSize; |
|
564 } |
|
565 } |
|
566 $segmentsFile->close(); |
|
567 |
|
568 $segmentsCount = count($segments) + count($this->_newSegments); |
|
569 |
|
570 foreach ($this->_newSegments as $segName => $segmentInfo) { |
|
571 $newSegmentFile->writeString($segName); |
|
572 $newSegmentFile->writeInt($segmentInfo->count()); |
|
573 |
|
574 // delete file generation: -1 (there is no delete file yet) |
|
575 $newSegmentFile->writeInt((int)0xFFFFFFFF);$newSegmentFile->writeInt((int)0xFFFFFFFF); |
|
576 if ($this->_targetFormatVersion == Zend_Search_Lucene::FORMAT_2_3) { |
|
577 // docStoreOffset: -1 (segment doesn't use shared doc store) |
|
578 $newSegmentFile->writeInt((int)0xFFFFFFFF); |
|
579 } |
|
580 // HasSingleNormFile |
|
581 $newSegmentFile->writeByte($segmentInfo->hasSingleNormFile()); |
|
582 // NumField |
|
583 $newSegmentFile->writeInt((int)0xFFFFFFFF); |
|
584 // IsCompoundFile |
|
585 $newSegmentFile->writeByte($segmentInfo->isCompound() ? 1 : -1); |
|
586 |
|
587 $segments[$segmentInfo->getName()] = $segmentInfo->count(); |
|
588 $this->_segmentInfos[$segName] = $segmentInfo; |
|
589 } |
|
590 $this->_newSegments = array(); |
|
591 |
|
592 $newSegmentFile->seek($numOfSegmentsOffset); |
|
593 $newSegmentFile->writeInt($segmentsCount); // Update segments count |
|
594 $newSegmentFile->close(); |
|
595 } catch (Exception $e) { |
|
596 /** Restore previous index generation */ |
|
597 $generation--; |
|
598 $genFile->seek(4, SEEK_SET); |
|
599 // Write generation number twice |
|
600 $genFile->writeLong($generation); $genFile->writeLong($generation); |
|
601 |
|
602 // Release index write lock |
|
603 Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); |
|
604 |
|
605 // Throw the exception |
|
606 require_once 'Zend/Search/Lucene/Exception.php'; |
|
607 throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e); |
|
608 } |
|
609 |
|
610 // Write generation (second copy) |
|
611 $genFile->writeLong($generation); |
|
612 |
|
613 |
|
614 // Check if another update or read process is not running now |
|
615 // If yes, skip clean-up procedure |
|
616 if (Zend_Search_Lucene_LockManager::escalateReadLock($this->_directory)) { |
|
617 /** |
|
618 * Clean-up directory |
|
619 */ |
|
620 $filesToDelete = array(); |
|
621 $filesTypes = array(); |
|
622 $filesNumbers = array(); |
|
623 |
|
624 // list of .del files of currently used segments |
|
625 // each segment can have several generations of .del files |
|
626 // only last should not be deleted |
|
627 $delFiles = array(); |
|
628 |
|
629 foreach ($this->_directory->fileList() as $file) { |
|
630 if ($file == 'deletable') { |
|
631 // 'deletable' file |
|
632 $filesToDelete[] = $file; |
|
633 $filesTypes[] = 0; // delete this file first, since it's not used starting from Lucene v2.1 |
|
634 $filesNumbers[] = 0; |
|
635 } else if ($file == 'segments') { |
|
636 // 'segments' file |
|
637 $filesToDelete[] = $file; |
|
638 $filesTypes[] = 1; // second file to be deleted "zero" version of segments file (Lucene pre-2.1) |
|
639 $filesNumbers[] = 0; |
|
640 } else if (preg_match('/^segments_[a-zA-Z0-9]+$/i', $file)) { |
|
641 // 'segments_xxx' file |
|
642 // Check if it's not a just created generation file |
|
643 if ($file != Zend_Search_Lucene::getSegmentFileName($generation)) { |
|
644 $filesToDelete[] = $file; |
|
645 $filesTypes[] = 2; // first group of files for deletions |
|
646 $filesNumbers[] = (int)base_convert(substr($file, 9), 36, 10); // ordered by segment generation numbers |
|
647 } |
|
648 } else if (preg_match('/(^_([a-zA-Z0-9]+))\.f\d+$/i', $file, $matches)) { |
|
649 // one of per segment files ('<segment_name>.f<decimal_number>') |
|
650 // Check if it's not one of the segments in the current segments set |
|
651 if (!isset($segments[$matches[1]])) { |
|
652 $filesToDelete[] = $file; |
|
653 $filesTypes[] = 3; // second group of files for deletions |
|
654 $filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number |
|
655 } |
|
656 } else if (preg_match('/(^_([a-zA-Z0-9]+))(_([a-zA-Z0-9]+))\.del$/i', $file, $matches)) { |
|
657 // one of per segment files ('<segment_name>_<del_generation>.del' where <segment_name> is '_<segment_number>') |
|
658 // Check if it's not one of the segments in the current segments set |
|
659 if (!isset($segments[$matches[1]])) { |
|
660 $filesToDelete[] = $file; |
|
661 $filesTypes[] = 3; // second group of files for deletions |
|
662 $filesNumbers[] = (int)base_convert($matches[2], 36, 10); // order by segment number |
|
663 } else { |
|
664 $segmentNumber = (int)base_convert($matches[2], 36, 10); |
|
665 $delGeneration = (int)base_convert($matches[4], 36, 10); |
|
666 if (!isset($delFiles[$segmentNumber])) { |
|
667 $delFiles[$segmentNumber] = array(); |
|
668 } |
|
669 $delFiles[$segmentNumber][$delGeneration] = $file; |
|
670 } |
|
671 } else if (isset(self::$_indexExtensions[substr($file, strlen($file)-4)])) { |
|
672 // one of per segment files ('<segment_name>.<ext>') |
|
673 $segmentName = substr($file, 0, strlen($file) - 4); |
|
674 // Check if it's not one of the segments in the current segments set |
|
675 if (!isset($segments[$segmentName]) && |
|
676 ($this->_currentSegment === null || $this->_currentSegment->getName() != $segmentName)) { |
|
677 $filesToDelete[] = $file; |
|
678 $filesTypes[] = 3; // second group of files for deletions |
|
679 $filesNumbers[] = (int)base_convert(substr($file, 1 /* skip '_' */, strlen($file)-5), 36, 10); // order by segment number |
|
680 } |
|
681 } |
|
682 } |
|
683 |
|
684 $maxGenNumber = 0; |
|
685 // process .del files of currently used segments |
|
686 foreach ($delFiles as $segmentNumber => $segmentDelFiles) { |
|
687 ksort($delFiles[$segmentNumber], SORT_NUMERIC); |
|
688 array_pop($delFiles[$segmentNumber]); // remove last delete file generation from candidates for deleting |
|
689 |
|
690 end($delFiles[$segmentNumber]); |
|
691 $lastGenNumber = key($delFiles[$segmentNumber]); |
|
692 if ($lastGenNumber > $maxGenNumber) { |
|
693 $maxGenNumber = $lastGenNumber; |
|
694 } |
|
695 } |
|
696 foreach ($delFiles as $segmentNumber => $segmentDelFiles) { |
|
697 foreach ($segmentDelFiles as $delGeneration => $file) { |
|
698 $filesToDelete[] = $file; |
|
699 $filesTypes[] = 4; // third group of files for deletions |
|
700 $filesNumbers[] = $segmentNumber*$maxGenNumber + $delGeneration; // order by <segment_number>,<del_generation> pair |
|
701 } |
|
702 } |
|
703 |
|
704 // Reorder files for deleting |
|
705 array_multisort($filesTypes, SORT_ASC, SORT_NUMERIC, |
|
706 $filesNumbers, SORT_ASC, SORT_NUMERIC, |
|
707 $filesToDelete, SORT_ASC, SORT_STRING); |
|
708 |
|
709 foreach ($filesToDelete as $file) { |
|
710 try { |
|
711 /** Skip shared docstore segments deleting */ |
|
712 /** @todo Process '.cfx' files to check if them are already unused */ |
|
713 if (substr($file, strlen($file)-4) != '.cfx') { |
|
714 $this->_directory->deleteFile($file); |
|
715 } |
|
716 } catch (Zend_Search_Lucene_Exception $e) { |
|
717 if (strpos($e->getMessage(), 'Can\'t delete file') === false) { |
|
718 // That's not "file is under processing or already deleted" exception |
|
719 // Pass it through |
|
720 throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e); |
|
721 } |
|
722 } |
|
723 } |
|
724 |
|
725 // Return read lock into the previous state |
|
726 Zend_Search_Lucene_LockManager::deEscalateReadLock($this->_directory); |
|
727 } else { |
|
728 // Only release resources if another index reader is running now |
|
729 foreach ($this->_segmentsToDelete as $segName) { |
|
730 foreach (self::$_indexExtensions as $ext) { |
|
731 $this->_directory->purgeFile($segName . $ext); |
|
732 } |
|
733 } |
|
734 } |
|
735 |
|
736 // Clean-up _segmentsToDelete container |
|
737 $this->_segmentsToDelete = array(); |
|
738 |
|
739 |
|
740 // Release index write lock |
|
741 Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); |
|
742 |
|
743 // Remove unused segments from segments list |
|
744 foreach ($this->_segmentInfos as $segName => $segmentInfo) { |
|
745 if (!isset($segments[$segName])) { |
|
746 unset($this->_segmentInfos[$segName]); |
|
747 } |
|
748 } |
|
749 } |
|
750 |
|
751 /** |
|
752 * Commit current changes |
|
753 */ |
|
754 public function commit() |
|
755 { |
|
756 if ($this->_currentSegment !== null) { |
|
757 $newSegment = $this->_currentSegment->close(); |
|
758 if ($newSegment !== null) { |
|
759 $this->_newSegments[$newSegment->getName()] = $newSegment; |
|
760 } |
|
761 $this->_currentSegment = null; |
|
762 } |
|
763 |
|
764 $this->_updateSegments(); |
|
765 } |
|
766 |
|
767 |
|
768 /** |
|
769 * Merges the provided indexes into this index. |
|
770 * |
|
771 * @param array $readers |
|
772 * @return void |
|
773 */ |
|
774 public function addIndexes($readers) |
|
775 { |
|
776 /** |
|
777 * @todo implementation |
|
778 */ |
|
779 } |
|
780 |
|
781 /** |
|
782 * Merges all segments together into new one |
|
783 * |
|
784 * Returns true on success and false if another optimization or auto-optimization process |
|
785 * is running now |
|
786 * |
|
787 * @return boolean |
|
788 */ |
|
789 public function optimize() |
|
790 { |
|
791 if (Zend_Search_Lucene_LockManager::obtainOptimizationLock($this->_directory) === false) { |
|
792 return false; |
|
793 } |
|
794 |
|
795 // Update segments list to be sure all segments are not merged yet by another process |
|
796 // |
|
797 // Segment merging functionality is concentrated in this class and surrounded |
|
798 // by optimization lock obtaining/releasing. |
|
799 // _updateSegments() refreshes segments list from the latest index generation. |
|
800 // So only new segments can be added to the index while we are merging some already existing |
|
801 // segments. |
|
802 // Newly added segments will be also included into the index by the _updateSegments() call |
|
803 // either by another process or by the current process with the commit() call at the end of _mergeSegments() method. |
|
804 // That's guaranteed by the serialisation of _updateSegments() execution using exclusive locks. |
|
805 $this->_updateSegments(); |
|
806 |
|
807 $this->_mergeSegments($this->_segmentInfos); |
|
808 |
|
809 Zend_Search_Lucene_LockManager::releaseOptimizationLock($this->_directory); |
|
810 |
|
811 return true; |
|
812 } |
|
813 |
|
814 /** |
|
815 * Get name for new segment |
|
816 * |
|
817 * @return string |
|
818 */ |
|
819 private function _newSegmentName() |
|
820 { |
|
821 Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory); |
|
822 |
|
823 $generation = Zend_Search_Lucene::getActualGeneration($this->_directory); |
|
824 $segmentsFile = $this->_directory->getFileObject(Zend_Search_Lucene::getSegmentFileName($generation), false); |
|
825 |
|
826 $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version) |
|
827 $segmentNameCounter = $segmentsFile->readInt(); |
|
828 |
|
829 $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version) |
|
830 $segmentsFile->writeInt($segmentNameCounter + 1); |
|
831 |
|
832 // Flash output to guarantee that wrong value will not be loaded between unlock and |
|
833 // return (which calls $segmentsFile destructor) |
|
834 $segmentsFile->flush(); |
|
835 |
|
836 Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); |
|
837 |
|
838 return '_' . base_convert($segmentNameCounter, 10, 36); |
|
839 } |
|
840 |
|
841 } |