|
1 <?php |
|
2 /** |
|
3 * Zend Framework |
|
4 * |
|
5 * LICENSE |
|
6 * |
|
7 * This source file is subject to the new BSD license that is bundled |
|
8 * with this package in the file LICENSE.txt. |
|
9 * It is also available through the world-wide-web at this URL: |
|
10 * http://framework.zend.com/license/new-bsd |
|
11 * If you did not receive a copy of the license and are unable to |
|
12 * obtain it through the world-wide-web, please send an email |
|
13 * to license@zend.com so we can send you a copy immediately. |
|
14 * |
|
15 * @category Zend |
|
16 * @package Zend_Search_Lucene |
|
17 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
18 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
19 * @version $Id: Lucene.php 22987 2010-09-21 10:39:53Z alexander $ |
|
20 */ |
|
21 |
|
22 |
|
23 /** User land classes and interfaces turned on by Zend/Search/Lucene.php file inclusion. */ |
|
24 /** @todo Section should be removed with ZF 2.0 release as obsolete */ |
|
25 |
|
26 /** Zend_Search_Lucene_Document_Html */ |
|
27 require_once 'Zend/Search/Lucene/Document/Html.php'; |
|
28 |
|
29 /** Zend_Search_Lucene_Document_Docx */ |
|
30 require_once 'Zend/Search/Lucene/Document/Docx.php'; |
|
31 |
|
32 /** Zend_Search_Lucene_Document_Pptx */ |
|
33 require_once 'Zend/Search/Lucene/Document/Pptx.php'; |
|
34 |
|
35 /** Zend_Search_Lucene_Document_Xlsx */ |
|
36 require_once 'Zend/Search/Lucene/Document/Xlsx.php'; |
|
37 |
|
38 /** Zend_Search_Lucene_Search_QueryParser */ |
|
39 require_once 'Zend/Search/Lucene/Search/QueryParser.php'; |
|
40 |
|
41 /** Zend_Search_Lucene_Search_QueryHit */ |
|
42 require_once 'Zend/Search/Lucene/Search/QueryHit.php'; |
|
43 |
|
44 /** Zend_Search_Lucene_Analysis_Analyzer */ |
|
45 require_once 'Zend/Search/Lucene/Analysis/Analyzer.php'; |
|
46 |
|
47 /** Zend_Search_Lucene_Search_Query_Term */ |
|
48 require_once 'Zend/Search/Lucene/Search/Query/Term.php'; |
|
49 |
|
50 /** Zend_Search_Lucene_Search_Query_Phrase */ |
|
51 require_once 'Zend/Search/Lucene/Search/Query/Phrase.php'; |
|
52 |
|
53 /** Zend_Search_Lucene_Search_Query_MultiTerm */ |
|
54 require_once 'Zend/Search/Lucene/Search/Query/MultiTerm.php'; |
|
55 |
|
56 /** Zend_Search_Lucene_Search_Query_Wildcard */ |
|
57 require_once 'Zend/Search/Lucene/Search/Query/Wildcard.php'; |
|
58 |
|
59 /** Zend_Search_Lucene_Search_Query_Range */ |
|
60 require_once 'Zend/Search/Lucene/Search/Query/Range.php'; |
|
61 |
|
62 /** Zend_Search_Lucene_Search_Query_Fuzzy */ |
|
63 require_once 'Zend/Search/Lucene/Search/Query/Fuzzy.php'; |
|
64 |
|
65 /** Zend_Search_Lucene_Search_Query_Boolean */ |
|
66 require_once 'Zend/Search/Lucene/Search/Query/Boolean.php'; |
|
67 |
|
68 /** Zend_Search_Lucene_Search_Query_Empty */ |
|
69 require_once 'Zend/Search/Lucene/Search/Query/Empty.php'; |
|
70 |
|
71 /** Zend_Search_Lucene_Search_Query_Insignificant */ |
|
72 require_once 'Zend/Search/Lucene/Search/Query/Insignificant.php'; |
|
73 |
|
74 |
|
75 |
|
76 |
|
77 /** Internally used classes */ |
|
78 |
|
79 /** Zend_Search_Lucene_Interface */ |
|
80 require_once 'Zend/Search/Lucene/Interface.php'; |
|
81 |
|
82 /** Zend_Search_Lucene_Index_SegmentInfo */ |
|
83 require_once 'Zend/Search/Lucene/Index/SegmentInfo.php'; |
|
84 |
|
85 /** Zend_Search_Lucene_LockManager */ |
|
86 require_once 'Zend/Search/Lucene/LockManager.php'; |
|
87 |
|
88 |
|
89 /** |
|
90 * @category Zend |
|
91 * @package Zend_Search_Lucene |
|
92 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
93 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
94 */ |
|
95 class Zend_Search_Lucene implements Zend_Search_Lucene_Interface |
|
96 { |
|
97 /** |
|
98 * Default field name for search |
|
99 * |
|
100 * Null means search through all fields |
|
101 * |
|
102 * @var string |
|
103 */ |
|
104 private static $_defaultSearchField = null; |
|
105 |
|
106 /** |
|
107 * Result set limit |
|
108 * |
|
109 * 0 means no limit |
|
110 * |
|
111 * @var integer |
|
112 */ |
|
113 private static $_resultSetLimit = 0; |
|
114 |
|
115 /** |
|
116 * Terms per query limit |
|
117 * |
|
118 * 0 means no limit |
|
119 * |
|
120 * @var integer |
|
121 */ |
|
122 private static $_termsPerQueryLimit = 1024; |
|
123 |
|
124 /** |
|
125 * File system adapter. |
|
126 * |
|
127 * @var Zend_Search_Lucene_Storage_Directory |
|
128 */ |
|
129 private $_directory = null; |
|
130 |
|
131 /** |
|
132 * File system adapter closing option |
|
133 * |
|
134 * @var boolean |
|
135 */ |
|
136 private $_closeDirOnExit = true; |
|
137 |
|
138 /** |
|
139 * Writer for this index, not instantiated unless required. |
|
140 * |
|
141 * @var Zend_Search_Lucene_Index_Writer |
|
142 */ |
|
143 private $_writer = null; |
|
144 |
|
145 /** |
|
146 * Array of Zend_Search_Lucene_Index_SegmentInfo objects for current version of index. |
|
147 * |
|
148 * @var array Zend_Search_Lucene_Index_SegmentInfo |
|
149 */ |
|
150 private $_segmentInfos = array(); |
|
151 |
|
152 /** |
|
153 * Number of documents in this index. |
|
154 * |
|
155 * @var integer |
|
156 */ |
|
157 private $_docCount = 0; |
|
158 |
|
159 /** |
|
160 * Flag for index changes |
|
161 * |
|
162 * @var boolean |
|
163 */ |
|
164 private $_hasChanges = false; |
|
165 |
|
166 |
|
167 /** |
|
168 * Signal, that index is already closed, changes are fixed and resources are cleaned up |
|
169 * |
|
170 * @var boolean |
|
171 */ |
|
172 private $_closed = false; |
|
173 |
|
174 /** |
|
175 * Number of references to the index object |
|
176 * |
|
177 * @var integer |
|
178 */ |
|
179 private $_refCount = 0; |
|
180 |
|
181 /** |
|
182 * Current segment generation |
|
183 * |
|
184 * @var integer |
|
185 */ |
|
186 private $_generation; |
|
187 |
|
188 const FORMAT_PRE_2_1 = 0; |
|
189 const FORMAT_2_1 = 1; |
|
190 const FORMAT_2_3 = 2; |
|
191 |
|
192 |
|
193 /** |
|
194 * Index format version |
|
195 * |
|
196 * @var integer |
|
197 */ |
|
198 private $_formatVersion; |
|
199 |
|
200 /** |
|
201 * Create index |
|
202 * |
|
203 * @param mixed $directory |
|
204 * @return Zend_Search_Lucene_Interface |
|
205 */ |
|
206 public static function create($directory) |
|
207 { |
|
208 /** Zend_Search_Lucene_Proxy */ |
|
209 require_once 'Zend/Search/Lucene/Proxy.php'; |
|
210 |
|
211 return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, true)); |
|
212 } |
|
213 |
|
214 /** |
|
215 * Open index |
|
216 * |
|
217 * @param mixed $directory |
|
218 * @return Zend_Search_Lucene_Interface |
|
219 */ |
|
220 public static function open($directory) |
|
221 { |
|
222 /** Zend_Search_Lucene_Proxy */ |
|
223 require_once 'Zend/Search/Lucene/Proxy.php'; |
|
224 |
|
225 return new Zend_Search_Lucene_Proxy(new Zend_Search_Lucene($directory, false)); |
|
226 } |
|
227 |
|
228 /** Generation retrieving counter */ |
|
229 const GENERATION_RETRIEVE_COUNT = 10; |
|
230 |
|
231 /** Pause between generation retrieving attempts in milliseconds */ |
|
232 const GENERATION_RETRIEVE_PAUSE = 50; |
|
233 |
|
234 /** |
|
235 * Get current generation number |
|
236 * |
|
237 * Returns generation number |
|
238 * 0 means pre-2.1 index format |
|
239 * -1 means there are no segments files. |
|
240 * |
|
241 * @param Zend_Search_Lucene_Storage_Directory $directory |
|
242 * @return integer |
|
243 * @throws Zend_Search_Lucene_Exception |
|
244 */ |
|
245 public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory) |
|
246 { |
|
247 /** |
|
248 * Zend_Search_Lucene uses segments.gen file to retrieve current generation number |
|
249 * |
|
250 * Apache Lucene index format documentation mentions this method only as a fallback method |
|
251 * |
|
252 * Nevertheless we use it according to the performance considerations |
|
253 * |
|
254 * @todo check if we can use some modification of Apache Lucene generation determination algorithm |
|
255 * without performance problems |
|
256 */ |
|
257 |
|
258 require_once 'Zend/Search/Lucene/Exception.php'; |
|
259 try { |
|
260 for ($count = 0; $count < self::GENERATION_RETRIEVE_COUNT; $count++) { |
|
261 // Try to get generation file |
|
262 $genFile = $directory->getFileObject('segments.gen', false); |
|
263 |
|
264 $format = $genFile->readInt(); |
|
265 if ($format != (int)0xFFFFFFFE) { |
|
266 throw new Zend_Search_Lucene_Exception('Wrong segments.gen file format'); |
|
267 } |
|
268 |
|
269 $gen1 = $genFile->readLong(); |
|
270 $gen2 = $genFile->readLong(); |
|
271 |
|
272 if ($gen1 == $gen2) { |
|
273 return $gen1; |
|
274 } |
|
275 |
|
276 usleep(self::GENERATION_RETRIEVE_PAUSE * 1000); |
|
277 } |
|
278 |
|
279 // All passes are failed |
|
280 throw new Zend_Search_Lucene_Exception('Index is under processing now'); |
|
281 } catch (Zend_Search_Lucene_Exception $e) { |
|
282 if (strpos($e->getMessage(), 'is not readable') !== false) { |
|
283 try { |
|
284 // Try to open old style segments file |
|
285 $segmentsFile = $directory->getFileObject('segments', false); |
|
286 |
|
287 // It's pre-2.1 index |
|
288 return 0; |
|
289 } catch (Zend_Search_Lucene_Exception $e) { |
|
290 if (strpos($e->getMessage(), 'is not readable') !== false) { |
|
291 return -1; |
|
292 } else { |
|
293 throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e); |
|
294 } |
|
295 } |
|
296 } else { |
|
297 throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e); |
|
298 } |
|
299 } |
|
300 |
|
301 return -1; |
|
302 } |
|
303 |
|
304 /** |
|
305 * Get generation number associated with this index instance |
|
306 * |
|
307 * The same generation number in pair with document number or query string |
|
308 * guarantees to give the same result while index retrieving. |
|
309 * So it may be used for search result caching. |
|
310 * |
|
311 * @return integer |
|
312 */ |
|
313 public function getGeneration() |
|
314 { |
|
315 return $this->_generation; |
|
316 } |
|
317 |
|
318 |
|
319 /** |
|
320 * Get segments file name |
|
321 * |
|
322 * @param integer $generation |
|
323 * @return string |
|
324 */ |
|
325 public static function getSegmentFileName($generation) |
|
326 { |
|
327 if ($generation == 0) { |
|
328 return 'segments'; |
|
329 } |
|
330 |
|
331 return 'segments_' . base_convert($generation, 10, 36); |
|
332 } |
|
333 |
|
334 /** |
|
335 * Get index format version |
|
336 * |
|
337 * @return integer |
|
338 */ |
|
339 public function getFormatVersion() |
|
340 { |
|
341 return $this->_formatVersion; |
|
342 } |
|
343 |
|
344 /** |
|
345 * Set index format version. |
|
346 * Index is converted to this format at the nearest upfdate time |
|
347 * |
|
348 * @param int $formatVersion |
|
349 * @throws Zend_Search_Lucene_Exception |
|
350 */ |
|
351 public function setFormatVersion($formatVersion) |
|
352 { |
|
353 if ($formatVersion != self::FORMAT_PRE_2_1 && |
|
354 $formatVersion != self::FORMAT_2_1 && |
|
355 $formatVersion != self::FORMAT_2_3) { |
|
356 require_once 'Zend/Search/Lucene/Exception.php'; |
|
357 throw new Zend_Search_Lucene_Exception('Unsupported index format'); |
|
358 } |
|
359 |
|
360 $this->_formatVersion = $formatVersion; |
|
361 } |
|
362 |
|
363 /** |
|
364 * Read segments file for pre-2.1 Lucene index format |
|
365 * |
|
366 * @throws Zend_Search_Lucene_Exception |
|
367 */ |
|
368 private function _readPre21SegmentsFile() |
|
369 { |
|
370 $segmentsFile = $this->_directory->getFileObject('segments'); |
|
371 |
|
372 $format = $segmentsFile->readInt(); |
|
373 |
|
374 if ($format != (int)0xFFFFFFFF) { |
|
375 require_once 'Zend/Search/Lucene/Exception.php'; |
|
376 throw new Zend_Search_Lucene_Exception('Wrong segments file format'); |
|
377 } |
|
378 |
|
379 // read version |
|
380 $segmentsFile->readLong(); |
|
381 |
|
382 // read segment name counter |
|
383 $segmentsFile->readInt(); |
|
384 |
|
385 $segments = $segmentsFile->readInt(); |
|
386 |
|
387 $this->_docCount = 0; |
|
388 |
|
389 // read segmentInfos |
|
390 for ($count = 0; $count < $segments; $count++) { |
|
391 $segName = $segmentsFile->readString(); |
|
392 $segSize = $segmentsFile->readInt(); |
|
393 $this->_docCount += $segSize; |
|
394 |
|
395 $this->_segmentInfos[$segName] = |
|
396 new Zend_Search_Lucene_Index_SegmentInfo($this->_directory, |
|
397 $segName, |
|
398 $segSize); |
|
399 } |
|
400 |
|
401 // Use 2.1 as a target version. Index will be reorganized at update time. |
|
402 $this->_formatVersion = self::FORMAT_2_1; |
|
403 } |
|
404 |
|
405 /** |
|
406 * Read segments file |
|
407 * |
|
408 * @throws Zend_Search_Lucene_Exception |
|
409 */ |
|
410 private function _readSegmentsFile() |
|
411 { |
|
412 $segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation)); |
|
413 |
|
414 $format = $segmentsFile->readInt(); |
|
415 |
|
416 if ($format == (int)0xFFFFFFFC) { |
|
417 $this->_formatVersion = self::FORMAT_2_3; |
|
418 } else if ($format == (int)0xFFFFFFFD) { |
|
419 $this->_formatVersion = self::FORMAT_2_1; |
|
420 } else { |
|
421 require_once 'Zend/Search/Lucene/Exception.php'; |
|
422 throw new Zend_Search_Lucene_Exception('Unsupported segments file format'); |
|
423 } |
|
424 |
|
425 // read version |
|
426 $segmentsFile->readLong(); |
|
427 |
|
428 // read segment name counter |
|
429 $segmentsFile->readInt(); |
|
430 |
|
431 $segments = $segmentsFile->readInt(); |
|
432 |
|
433 $this->_docCount = 0; |
|
434 |
|
435 // read segmentInfos |
|
436 for ($count = 0; $count < $segments; $count++) { |
|
437 $segName = $segmentsFile->readString(); |
|
438 $segSize = $segmentsFile->readInt(); |
|
439 |
|
440 // 2.1+ specific properties |
|
441 $delGen = $segmentsFile->readLong(); |
|
442 |
|
443 if ($this->_formatVersion == self::FORMAT_2_3) { |
|
444 $docStoreOffset = $segmentsFile->readInt(); |
|
445 |
|
446 if ($docStoreOffset != (int)0xFFFFFFFF) { |
|
447 $docStoreSegment = $segmentsFile->readString(); |
|
448 $docStoreIsCompoundFile = $segmentsFile->readByte(); |
|
449 |
|
450 $docStoreOptions = array('offset' => $docStoreOffset, |
|
451 'segment' => $docStoreSegment, |
|
452 'isCompound' => ($docStoreIsCompoundFile == 1)); |
|
453 } else { |
|
454 $docStoreOptions = null; |
|
455 } |
|
456 } else { |
|
457 $docStoreOptions = null; |
|
458 } |
|
459 |
|
460 $hasSingleNormFile = $segmentsFile->readByte(); |
|
461 $numField = $segmentsFile->readInt(); |
|
462 |
|
463 $normGens = array(); |
|
464 if ($numField != (int)0xFFFFFFFF) { |
|
465 for ($count1 = 0; $count1 < $numField; $count1++) { |
|
466 $normGens[] = $segmentsFile->readLong(); |
|
467 } |
|
468 |
|
469 require_once 'Zend/Search/Lucene/Exception.php'; |
|
470 throw new Zend_Search_Lucene_Exception('Separate norm files are not supported. Optimize index to use it with Zend_Search_Lucene.'); |
|
471 } |
|
472 |
|
473 $isCompoundByte = $segmentsFile->readByte(); |
|
474 |
|
475 if ($isCompoundByte == 0xFF) { |
|
476 // The segment is not a compound file |
|
477 $isCompound = false; |
|
478 } else if ($isCompoundByte == 0x00) { |
|
479 // The status is unknown |
|
480 $isCompound = null; |
|
481 } else if ($isCompoundByte == 0x01) { |
|
482 // The segment is a compound file |
|
483 $isCompound = true; |
|
484 } |
|
485 |
|
486 $this->_docCount += $segSize; |
|
487 |
|
488 $this->_segmentInfos[$segName] = |
|
489 new Zend_Search_Lucene_Index_SegmentInfo($this->_directory, |
|
490 $segName, |
|
491 $segSize, |
|
492 $delGen, |
|
493 $docStoreOptions, |
|
494 $hasSingleNormFile, |
|
495 $isCompound); |
|
496 } |
|
497 } |
|
498 |
|
499 /** |
|
500 * Opens the index. |
|
501 * |
|
502 * IndexReader constructor needs Directory as a parameter. It should be |
|
503 * a string with a path to the index folder or a Directory object. |
|
504 * |
|
505 * @param Zend_Search_Lucene_Storage_Directory_Filesystem|string $directory |
|
506 * @throws Zend_Search_Lucene_Exception |
|
507 */ |
|
508 public function __construct($directory = null, $create = false) |
|
509 { |
|
510 if ($directory === null) { |
|
511 require_once 'Zend/Search/Lucene/Exception.php'; |
|
512 throw new Zend_Search_Exception('No index directory specified'); |
|
513 } |
|
514 |
|
515 if (is_string($directory)) { |
|
516 require_once 'Zend/Search/Lucene/Storage/Directory/Filesystem.php'; |
|
517 $this->_directory = new Zend_Search_Lucene_Storage_Directory_Filesystem($directory); |
|
518 $this->_closeDirOnExit = true; |
|
519 } else { |
|
520 $this->_directory = $directory; |
|
521 $this->_closeDirOnExit = false; |
|
522 } |
|
523 |
|
524 $this->_segmentInfos = array(); |
|
525 |
|
526 // Mark index as "under processing" to prevent other processes from premature index cleaning |
|
527 Zend_Search_Lucene_LockManager::obtainReadLock($this->_directory); |
|
528 |
|
529 $this->_generation = self::getActualGeneration($this->_directory); |
|
530 |
|
531 if ($create) { |
|
532 require_once 'Zend/Search/Lucene/Exception.php'; |
|
533 try { |
|
534 Zend_Search_Lucene_LockManager::obtainWriteLock($this->_directory); |
|
535 } catch (Zend_Search_Lucene_Exception $e) { |
|
536 Zend_Search_Lucene_LockManager::releaseReadLock($this->_directory); |
|
537 |
|
538 if (strpos($e->getMessage(), 'Can\'t obtain exclusive index lock') === false) { |
|
539 throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e); |
|
540 } else { |
|
541 throw new Zend_Search_Lucene_Exception('Can\'t create index. It\'s under processing now', 0, $e); |
|
542 } |
|
543 } |
|
544 |
|
545 if ($this->_generation == -1) { |
|
546 // Directory doesn't contain existing index, start from 1 |
|
547 $this->_generation = 1; |
|
548 $nameCounter = 0; |
|
549 } else { |
|
550 // Directory contains existing index |
|
551 $segmentsFile = $this->_directory->getFileObject(self::getSegmentFileName($this->_generation)); |
|
552 $segmentsFile->seek(12); // 12 = 4 (int, file format marker) + 8 (long, index version) |
|
553 |
|
554 $nameCounter = $segmentsFile->readInt(); |
|
555 $this->_generation++; |
|
556 } |
|
557 |
|
558 require_once 'Zend/Search/Lucene/Index/Writer.php'; |
|
559 Zend_Search_Lucene_Index_Writer::createIndex($this->_directory, $this->_generation, $nameCounter); |
|
560 |
|
561 Zend_Search_Lucene_LockManager::releaseWriteLock($this->_directory); |
|
562 } |
|
563 |
|
564 if ($this->_generation == -1) { |
|
565 require_once 'Zend/Search/Lucene/Exception.php'; |
|
566 throw new Zend_Search_Lucene_Exception('Index doesn\'t exists in the specified directory.'); |
|
567 } else if ($this->_generation == 0) { |
|
568 $this->_readPre21SegmentsFile(); |
|
569 } else { |
|
570 $this->_readSegmentsFile(); |
|
571 } |
|
572 } |
|
573 |
|
574 /** |
|
575 * Close current index and free resources |
|
576 */ |
|
577 private function _close() |
|
578 { |
|
579 if ($this->_closed) { |
|
580 // index is already closed and resources are cleaned up |
|
581 return; |
|
582 } |
|
583 |
|
584 $this->commit(); |
|
585 |
|
586 // Release "under processing" flag |
|
587 Zend_Search_Lucene_LockManager::releaseReadLock($this->_directory); |
|
588 |
|
589 if ($this->_closeDirOnExit) { |
|
590 $this->_directory->close(); |
|
591 } |
|
592 |
|
593 $this->_directory = null; |
|
594 $this->_writer = null; |
|
595 $this->_segmentInfos = null; |
|
596 |
|
597 $this->_closed = true; |
|
598 } |
|
599 |
|
600 /** |
|
601 * Add reference to the index object |
|
602 * |
|
603 * @internal |
|
604 */ |
|
605 public function addReference() |
|
606 { |
|
607 $this->_refCount++; |
|
608 } |
|
609 |
|
610 /** |
|
611 * Remove reference from the index object |
|
612 * |
|
613 * When reference count becomes zero, index is closed and resources are cleaned up |
|
614 * |
|
615 * @internal |
|
616 */ |
|
617 public function removeReference() |
|
618 { |
|
619 $this->_refCount--; |
|
620 |
|
621 if ($this->_refCount == 0) { |
|
622 $this->_close(); |
|
623 } |
|
624 } |
|
625 |
|
626 /** |
|
627 * Object destructor |
|
628 */ |
|
629 public function __destruct() |
|
630 { |
|
631 $this->_close(); |
|
632 } |
|
633 |
|
634 /** |
|
635 * Returns an instance of Zend_Search_Lucene_Index_Writer for the index |
|
636 * |
|
637 * @return Zend_Search_Lucene_Index_Writer |
|
638 */ |
|
639 private function _getIndexWriter() |
|
640 { |
|
641 if ($this->_writer === null) { |
|
642 require_once 'Zend/Search/Lucene/Index/Writer.php'; |
|
643 $this->_writer = new Zend_Search_Lucene_Index_Writer($this->_directory, |
|
644 $this->_segmentInfos, |
|
645 $this->_formatVersion); |
|
646 } |
|
647 |
|
648 return $this->_writer; |
|
649 } |
|
650 |
|
651 |
|
652 /** |
|
653 * Returns the Zend_Search_Lucene_Storage_Directory instance for this index. |
|
654 * |
|
655 * @return Zend_Search_Lucene_Storage_Directory |
|
656 */ |
|
657 public function getDirectory() |
|
658 { |
|
659 return $this->_directory; |
|
660 } |
|
661 |
|
662 |
|
663 /** |
|
664 * Returns the total number of documents in this index (including deleted documents). |
|
665 * |
|
666 * @return integer |
|
667 */ |
|
668 public function count() |
|
669 { |
|
670 return $this->_docCount; |
|
671 } |
|
672 |
|
673 /** |
|
674 * Returns one greater than the largest possible document number. |
|
675 * This may be used to, e.g., determine how big to allocate a structure which will have |
|
676 * an element for every document number in an index. |
|
677 * |
|
678 * @return integer |
|
679 */ |
|
680 public function maxDoc() |
|
681 { |
|
682 return $this->count(); |
|
683 } |
|
684 |
|
685 /** |
|
686 * Returns the total number of non-deleted documents in this index. |
|
687 * |
|
688 * @return integer |
|
689 */ |
|
690 public function numDocs() |
|
691 { |
|
692 $numDocs = 0; |
|
693 |
|
694 foreach ($this->_segmentInfos as $segmentInfo) { |
|
695 $numDocs += $segmentInfo->numDocs(); |
|
696 } |
|
697 |
|
698 return $numDocs; |
|
699 } |
|
700 |
|
701 /** |
|
702 * Checks, that document is deleted |
|
703 * |
|
704 * @param integer $id |
|
705 * @return boolean |
|
706 * @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range |
|
707 */ |
|
708 public function isDeleted($id) |
|
709 { |
|
710 $this->commit(); |
|
711 |
|
712 if ($id >= $this->_docCount) { |
|
713 require_once 'Zend/Search/Lucene/Exception.php'; |
|
714 throw new Zend_Search_Lucene_Exception('Document id is out of the range.'); |
|
715 } |
|
716 |
|
717 $segmentStartId = 0; |
|
718 foreach ($this->_segmentInfos as $segmentInfo) { |
|
719 if ($segmentStartId + $segmentInfo->count() > $id) { |
|
720 break; |
|
721 } |
|
722 |
|
723 $segmentStartId += $segmentInfo->count(); |
|
724 } |
|
725 |
|
726 return $segmentInfo->isDeleted($id - $segmentStartId); |
|
727 } |
|
728 |
|
729 /** |
|
730 * Set default search field. |
|
731 * |
|
732 * Null means, that search is performed through all fields by default |
|
733 * |
|
734 * Default value is null |
|
735 * |
|
736 * @param string $fieldName |
|
737 */ |
|
738 public static function setDefaultSearchField($fieldName) |
|
739 { |
|
740 self::$_defaultSearchField = $fieldName; |
|
741 } |
|
742 |
|
743 /** |
|
744 * Get default search field. |
|
745 * |
|
746 * Null means, that search is performed through all fields by default |
|
747 * |
|
748 * @return string |
|
749 */ |
|
750 public static function getDefaultSearchField() |
|
751 { |
|
752 return self::$_defaultSearchField; |
|
753 } |
|
754 |
|
755 /** |
|
756 * Set result set limit. |
|
757 * |
|
758 * 0 (default) means no limit |
|
759 * |
|
760 * @param integer $limit |
|
761 */ |
|
762 public static function setResultSetLimit($limit) |
|
763 { |
|
764 self::$_resultSetLimit = $limit; |
|
765 } |
|
766 |
|
767 /** |
|
768 * Get result set limit. |
|
769 * |
|
770 * 0 means no limit |
|
771 * |
|
772 * @return integer |
|
773 */ |
|
774 public static function getResultSetLimit() |
|
775 { |
|
776 return self::$_resultSetLimit; |
|
777 } |
|
778 |
|
779 /** |
|
780 * Set terms per query limit. |
|
781 * |
|
782 * 0 means no limit |
|
783 * |
|
784 * @param integer $limit |
|
785 */ |
|
786 public static function setTermsPerQueryLimit($limit) |
|
787 { |
|
788 self::$_termsPerQueryLimit = $limit; |
|
789 } |
|
790 |
|
791 /** |
|
792 * Get result set limit. |
|
793 * |
|
794 * 0 (default) means no limit |
|
795 * |
|
796 * @return integer |
|
797 */ |
|
798 public static function getTermsPerQueryLimit() |
|
799 { |
|
800 return self::$_termsPerQueryLimit; |
|
801 } |
|
802 |
|
803 /** |
|
804 * Retrieve index maxBufferedDocs option |
|
805 * |
|
806 * maxBufferedDocs is a minimal number of documents required before |
|
807 * the buffered in-memory documents are written into a new Segment |
|
808 * |
|
809 * Default value is 10 |
|
810 * |
|
811 * @return integer |
|
812 */ |
|
813 public function getMaxBufferedDocs() |
|
814 { |
|
815 return $this->_getIndexWriter()->maxBufferedDocs; |
|
816 } |
|
817 |
|
818 /** |
|
819 * Set index maxBufferedDocs option |
|
820 * |
|
821 * maxBufferedDocs is a minimal number of documents required before |
|
822 * the buffered in-memory documents are written into a new Segment |
|
823 * |
|
824 * Default value is 10 |
|
825 * |
|
826 * @param integer $maxBufferedDocs |
|
827 */ |
|
828 public function setMaxBufferedDocs($maxBufferedDocs) |
|
829 { |
|
830 $this->_getIndexWriter()->maxBufferedDocs = $maxBufferedDocs; |
|
831 } |
|
832 |
|
833 /** |
|
834 * Retrieve index maxMergeDocs option |
|
835 * |
|
836 * maxMergeDocs is a largest number of documents ever merged by addDocument(). |
|
837 * Small values (e.g., less than 10,000) are best for interactive indexing, |
|
838 * as this limits the length of pauses while indexing to a few seconds. |
|
839 * Larger values are best for batched indexing and speedier searches. |
|
840 * |
|
841 * Default value is PHP_INT_MAX |
|
842 * |
|
843 * @return integer |
|
844 */ |
|
845 public function getMaxMergeDocs() |
|
846 { |
|
847 return $this->_getIndexWriter()->maxMergeDocs; |
|
848 } |
|
849 |
|
850 /** |
|
851 * Set index maxMergeDocs option |
|
852 * |
|
853 * maxMergeDocs is a largest number of documents ever merged by addDocument(). |
|
854 * Small values (e.g., less than 10,000) are best for interactive indexing, |
|
855 * as this limits the length of pauses while indexing to a few seconds. |
|
856 * Larger values are best for batched indexing and speedier searches. |
|
857 * |
|
858 * Default value is PHP_INT_MAX |
|
859 * |
|
860 * @param integer $maxMergeDocs |
|
861 */ |
|
862 public function setMaxMergeDocs($maxMergeDocs) |
|
863 { |
|
864 $this->_getIndexWriter()->maxMergeDocs = $maxMergeDocs; |
|
865 } |
|
866 |
|
867 /** |
|
868 * Retrieve index mergeFactor option |
|
869 * |
|
870 * mergeFactor determines how often segment indices are merged by addDocument(). |
|
871 * With smaller values, less RAM is used while indexing, |
|
872 * and searches on unoptimized indices are faster, |
|
873 * but indexing speed is slower. |
|
874 * With larger values, more RAM is used during indexing, |
|
875 * and while searches on unoptimized indices are slower, |
|
876 * indexing is faster. |
|
877 * Thus larger values (> 10) are best for batch index creation, |
|
878 * and smaller values (< 10) for indices that are interactively maintained. |
|
879 * |
|
880 * Default value is 10 |
|
881 * |
|
882 * @return integer |
|
883 */ |
|
884 public function getMergeFactor() |
|
885 { |
|
886 return $this->_getIndexWriter()->mergeFactor; |
|
887 } |
|
888 |
|
889 /** |
|
890 * Set index mergeFactor option |
|
891 * |
|
892 * mergeFactor determines how often segment indices are merged by addDocument(). |
|
893 * With smaller values, less RAM is used while indexing, |
|
894 * and searches on unoptimized indices are faster, |
|
895 * but indexing speed is slower. |
|
896 * With larger values, more RAM is used during indexing, |
|
897 * and while searches on unoptimized indices are slower, |
|
898 * indexing is faster. |
|
899 * Thus larger values (> 10) are best for batch index creation, |
|
900 * and smaller values (< 10) for indices that are interactively maintained. |
|
901 * |
|
902 * Default value is 10 |
|
903 * |
|
904 * @param integer $maxMergeDocs |
|
905 */ |
|
906 public function setMergeFactor($mergeFactor) |
|
907 { |
|
908 $this->_getIndexWriter()->mergeFactor = $mergeFactor; |
|
909 } |
|
910 |
|
911 /** |
|
912 * Performs a query against the index and returns an array |
|
913 * of Zend_Search_Lucene_Search_QueryHit objects. |
|
914 * Input is a string or Zend_Search_Lucene_Search_Query. |
|
915 * |
|
916 * @param Zend_Search_Lucene_Search_QueryParser|string $query |
|
917 * @return array Zend_Search_Lucene_Search_QueryHit |
|
918 * @throws Zend_Search_Lucene_Exception |
|
919 */ |
|
920 public function find($query) |
|
921 { |
|
922 if (is_string($query)) { |
|
923 require_once 'Zend/Search/Lucene/Search/QueryParser.php'; |
|
924 |
|
925 $query = Zend_Search_Lucene_Search_QueryParser::parse($query); |
|
926 } |
|
927 |
|
928 if (!$query instanceof Zend_Search_Lucene_Search_Query) { |
|
929 require_once 'Zend/Search/Lucene/Exception.php'; |
|
930 throw new Zend_Search_Lucene_Exception('Query must be a string or Zend_Search_Lucene_Search_Query object'); |
|
931 } |
|
932 |
|
933 $this->commit(); |
|
934 |
|
935 $hits = array(); |
|
936 $scores = array(); |
|
937 $ids = array(); |
|
938 |
|
939 $query = $query->rewrite($this)->optimize($this); |
|
940 |
|
941 $query->execute($this); |
|
942 |
|
943 $topScore = 0; |
|
944 |
|
945 /** Zend_Search_Lucene_Search_QueryHit */ |
|
946 require_once 'Zend/Search/Lucene/Search/QueryHit.php'; |
|
947 |
|
948 foreach ($query->matchedDocs() as $id => $num) { |
|
949 $docScore = $query->score($id, $this); |
|
950 if( $docScore != 0 ) { |
|
951 $hit = new Zend_Search_Lucene_Search_QueryHit($this); |
|
952 $hit->id = $id; |
|
953 $hit->score = $docScore; |
|
954 |
|
955 $hits[] = $hit; |
|
956 $ids[] = $id; |
|
957 $scores[] = $docScore; |
|
958 |
|
959 if ($docScore > $topScore) { |
|
960 $topScore = $docScore; |
|
961 } |
|
962 } |
|
963 |
|
964 if (self::$_resultSetLimit != 0 && count($hits) >= self::$_resultSetLimit) { |
|
965 break; |
|
966 } |
|
967 } |
|
968 |
|
969 if (count($hits) == 0) { |
|
970 // skip sorting, which may cause a error on empty index |
|
971 return array(); |
|
972 } |
|
973 |
|
974 if ($topScore > 1) { |
|
975 foreach ($hits as $hit) { |
|
976 $hit->score /= $topScore; |
|
977 } |
|
978 } |
|
979 |
|
980 if (func_num_args() == 1) { |
|
981 // sort by scores |
|
982 array_multisort($scores, SORT_DESC, SORT_NUMERIC, |
|
983 $ids, SORT_ASC, SORT_NUMERIC, |
|
984 $hits); |
|
985 } else { |
|
986 // sort by given field names |
|
987 |
|
988 $argList = func_get_args(); |
|
989 $fieldNames = $this->getFieldNames(); |
|
990 $sortArgs = array(); |
|
991 |
|
992 // PHP 5.3 now expects all arguments to array_multisort be passed by |
|
993 // reference (if it's invoked through call_user_func_array()); |
|
994 // since constants can't be passed by reference, create some placeholder variables. |
|
995 $sortReg = SORT_REGULAR; |
|
996 $sortAsc = SORT_ASC; |
|
997 $sortNum = SORT_NUMERIC; |
|
998 |
|
999 $sortFieldValues = array(); |
|
1000 |
|
1001 require_once 'Zend/Search/Lucene/Exception.php'; |
|
1002 for ($count = 1; $count < count($argList); $count++) { |
|
1003 $fieldName = $argList[$count]; |
|
1004 |
|
1005 if (!is_string($fieldName)) { |
|
1006 throw new Zend_Search_Lucene_Exception('Field name must be a string.'); |
|
1007 } |
|
1008 |
|
1009 if (strtolower($fieldName) == 'score') { |
|
1010 $sortArgs[] = &$scores; |
|
1011 } else { |
|
1012 if (!in_array($fieldName, $fieldNames)) { |
|
1013 throw new Zend_Search_Lucene_Exception('Wrong field name.'); |
|
1014 } |
|
1015 |
|
1016 if (!isset($sortFieldValues[$fieldName])) { |
|
1017 $valuesArray = array(); |
|
1018 foreach ($hits as $hit) { |
|
1019 try { |
|
1020 $value = $hit->getDocument()->getFieldValue($fieldName); |
|
1021 } catch (Zend_Search_Lucene_Exception $e) { |
|
1022 if (strpos($e->getMessage(), 'not found') === false) { |
|
1023 throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e); |
|
1024 } else { |
|
1025 $value = null; |
|
1026 } |
|
1027 } |
|
1028 |
|
1029 $valuesArray[] = $value; |
|
1030 } |
|
1031 |
|
1032 // Collect loaded values in $sortFieldValues |
|
1033 // Required for PHP 5.3 which translates references into values when source |
|
1034 // variable is destroyed |
|
1035 $sortFieldValues[$fieldName] = $valuesArray; |
|
1036 } |
|
1037 |
|
1038 $sortArgs[] = &$sortFieldValues[$fieldName]; |
|
1039 } |
|
1040 |
|
1041 if ($count + 1 < count($argList) && is_integer($argList[$count+1])) { |
|
1042 $count++; |
|
1043 $sortArgs[] = &$argList[$count]; |
|
1044 |
|
1045 if ($count + 1 < count($argList) && is_integer($argList[$count+1])) { |
|
1046 $count++; |
|
1047 $sortArgs[] = &$argList[$count]; |
|
1048 } else { |
|
1049 if ($argList[$count] == SORT_ASC || $argList[$count] == SORT_DESC) { |
|
1050 $sortArgs[] = &$sortReg; |
|
1051 } else { |
|
1052 $sortArgs[] = &$sortAsc; |
|
1053 } |
|
1054 } |
|
1055 } else { |
|
1056 $sortArgs[] = &$sortAsc; |
|
1057 $sortArgs[] = &$sortReg; |
|
1058 } |
|
1059 } |
|
1060 |
|
1061 // Sort by id's if values are equal |
|
1062 $sortArgs[] = &$ids; |
|
1063 $sortArgs[] = &$sortAsc; |
|
1064 $sortArgs[] = &$sortNum; |
|
1065 |
|
1066 // Array to be sorted |
|
1067 $sortArgs[] = &$hits; |
|
1068 |
|
1069 // Do sort |
|
1070 call_user_func_array('array_multisort', $sortArgs); |
|
1071 } |
|
1072 |
|
1073 return $hits; |
|
1074 } |
|
1075 |
|
1076 |
|
1077 /** |
|
1078 * Returns a list of all unique field names that exist in this index. |
|
1079 * |
|
1080 * @param boolean $indexed |
|
1081 * @return array |
|
1082 */ |
|
1083 public function getFieldNames($indexed = false) |
|
1084 { |
|
1085 $result = array(); |
|
1086 foreach( $this->_segmentInfos as $segmentInfo ) { |
|
1087 $result = array_merge($result, $segmentInfo->getFields($indexed)); |
|
1088 } |
|
1089 return $result; |
|
1090 } |
|
1091 |
|
1092 |
|
1093 /** |
|
1094 * Returns a Zend_Search_Lucene_Document object for the document |
|
1095 * number $id in this index. |
|
1096 * |
|
1097 * @param integer|Zend_Search_Lucene_Search_QueryHit $id |
|
1098 * @return Zend_Search_Lucene_Document |
|
1099 * @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range |
|
1100 */ |
|
1101 public function getDocument($id) |
|
1102 { |
|
1103 if ($id instanceof Zend_Search_Lucene_Search_QueryHit) { |
|
1104 /* @var $id Zend_Search_Lucene_Search_QueryHit */ |
|
1105 $id = $id->id; |
|
1106 } |
|
1107 |
|
1108 if ($id >= $this->_docCount) { |
|
1109 require_once 'Zend/Search/Lucene/Exception.php'; |
|
1110 throw new Zend_Search_Lucene_Exception('Document id is out of the range.'); |
|
1111 } |
|
1112 |
|
1113 $segmentStartId = 0; |
|
1114 foreach ($this->_segmentInfos as $segmentInfo) { |
|
1115 if ($segmentStartId + $segmentInfo->count() > $id) { |
|
1116 break; |
|
1117 } |
|
1118 |
|
1119 $segmentStartId += $segmentInfo->count(); |
|
1120 } |
|
1121 |
|
1122 $fdxFile = $segmentInfo->openCompoundFile('.fdx'); |
|
1123 $fdxFile->seek(($id-$segmentStartId)*8, SEEK_CUR); |
|
1124 $fieldValuesPosition = $fdxFile->readLong(); |
|
1125 |
|
1126 $fdtFile = $segmentInfo->openCompoundFile('.fdt'); |
|
1127 $fdtFile->seek($fieldValuesPosition, SEEK_CUR); |
|
1128 $fieldCount = $fdtFile->readVInt(); |
|
1129 |
|
1130 $doc = new Zend_Search_Lucene_Document(); |
|
1131 for ($count = 0; $count < $fieldCount; $count++) { |
|
1132 $fieldNum = $fdtFile->readVInt(); |
|
1133 $bits = $fdtFile->readByte(); |
|
1134 |
|
1135 $fieldInfo = $segmentInfo->getField($fieldNum); |
|
1136 |
|
1137 if (!($bits & 2)) { // Text data |
|
1138 $field = new Zend_Search_Lucene_Field($fieldInfo->name, |
|
1139 $fdtFile->readString(), |
|
1140 'UTF-8', |
|
1141 true, |
|
1142 $fieldInfo->isIndexed, |
|
1143 $bits & 1 ); |
|
1144 } else { // Binary data |
|
1145 $field = new Zend_Search_Lucene_Field($fieldInfo->name, |
|
1146 $fdtFile->readBinary(), |
|
1147 '', |
|
1148 true, |
|
1149 $fieldInfo->isIndexed, |
|
1150 $bits & 1, |
|
1151 true ); |
|
1152 } |
|
1153 |
|
1154 $doc->addField($field); |
|
1155 } |
|
1156 |
|
1157 return $doc; |
|
1158 } |
|
1159 |
|
1160 |
|
1161 /** |
|
1162 * Returns true if index contain documents with specified term. |
|
1163 * |
|
1164 * Is used for query optimization. |
|
1165 * |
|
1166 * @param Zend_Search_Lucene_Index_Term $term |
|
1167 * @return boolean |
|
1168 */ |
|
1169 public function hasTerm(Zend_Search_Lucene_Index_Term $term) |
|
1170 { |
|
1171 foreach ($this->_segmentInfos as $segInfo) { |
|
1172 if ($segInfo->getTermInfo($term) !== null) { |
|
1173 return true; |
|
1174 } |
|
1175 } |
|
1176 |
|
1177 return false; |
|
1178 } |
|
1179 |
|
1180 /** |
|
1181 * Returns IDs of all documents containing term. |
|
1182 * |
|
1183 * @param Zend_Search_Lucene_Index_Term $term |
|
1184 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter |
|
1185 * @return array |
|
1186 */ |
|
1187 public function termDocs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null) |
|
1188 { |
|
1189 $subResults = array(); |
|
1190 $segmentStartDocId = 0; |
|
1191 |
|
1192 foreach ($this->_segmentInfos as $segmentInfo) { |
|
1193 $subResults[] = $segmentInfo->termDocs($term, $segmentStartDocId, $docsFilter); |
|
1194 |
|
1195 $segmentStartDocId += $segmentInfo->count(); |
|
1196 } |
|
1197 |
|
1198 if (count($subResults) == 0) { |
|
1199 return array(); |
|
1200 } else if (count($subResults) == 1) { |
|
1201 // Index is optimized (only one segment) |
|
1202 // Do not perform array reindexing |
|
1203 return reset($subResults); |
|
1204 } else { |
|
1205 $result = call_user_func_array('array_merge', $subResults); |
|
1206 } |
|
1207 |
|
1208 return $result; |
|
1209 } |
|
1210 |
|
1211 /** |
|
1212 * Returns documents filter for all documents containing term. |
|
1213 * |
|
1214 * It performs the same operation as termDocs, but return result as |
|
1215 * Zend_Search_Lucene_Index_DocsFilter object |
|
1216 * |
|
1217 * @param Zend_Search_Lucene_Index_Term $term |
|
1218 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter |
|
1219 * @return Zend_Search_Lucene_Index_DocsFilter |
|
1220 */ |
|
1221 public function termDocsFilter(Zend_Search_Lucene_Index_Term $term, $docsFilter = null) |
|
1222 { |
|
1223 $segmentStartDocId = 0; |
|
1224 $result = new Zend_Search_Lucene_Index_DocsFilter(); |
|
1225 |
|
1226 foreach ($this->_segmentInfos as $segmentInfo) { |
|
1227 $subResults[] = $segmentInfo->termDocs($term, $segmentStartDocId, $docsFilter); |
|
1228 |
|
1229 $segmentStartDocId += $segmentInfo->count(); |
|
1230 } |
|
1231 |
|
1232 if (count($subResults) == 0) { |
|
1233 return array(); |
|
1234 } else if (count($subResults) == 1) { |
|
1235 // Index is optimized (only one segment) |
|
1236 // Do not perform array reindexing |
|
1237 return reset($subResults); |
|
1238 } else { |
|
1239 $result = call_user_func_array('array_merge', $subResults); |
|
1240 } |
|
1241 |
|
1242 return $result; |
|
1243 } |
|
1244 |
|
1245 |
|
1246 /** |
|
1247 * Returns an array of all term freqs. |
|
1248 * Result array structure: array(docId => freq, ...) |
|
1249 * |
|
1250 * @param Zend_Search_Lucene_Index_Term $term |
|
1251 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter |
|
1252 * @return integer |
|
1253 */ |
|
1254 public function termFreqs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null) |
|
1255 { |
|
1256 $result = array(); |
|
1257 $segmentStartDocId = 0; |
|
1258 foreach ($this->_segmentInfos as $segmentInfo) { |
|
1259 $result += $segmentInfo->termFreqs($term, $segmentStartDocId, $docsFilter); |
|
1260 |
|
1261 $segmentStartDocId += $segmentInfo->count(); |
|
1262 } |
|
1263 |
|
1264 return $result; |
|
1265 } |
|
1266 |
|
1267 /** |
|
1268 * Returns an array of all term positions in the documents. |
|
1269 * Result array structure: array(docId => array(pos1, pos2, ...), ...) |
|
1270 * |
|
1271 * @param Zend_Search_Lucene_Index_Term $term |
|
1272 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter |
|
1273 * @return array |
|
1274 */ |
|
1275 public function termPositions(Zend_Search_Lucene_Index_Term $term, $docsFilter = null) |
|
1276 { |
|
1277 $result = array(); |
|
1278 $segmentStartDocId = 0; |
|
1279 foreach ($this->_segmentInfos as $segmentInfo) { |
|
1280 $result += $segmentInfo->termPositions($term, $segmentStartDocId, $docsFilter); |
|
1281 |
|
1282 $segmentStartDocId += $segmentInfo->count(); |
|
1283 } |
|
1284 |
|
1285 return $result; |
|
1286 } |
|
1287 |
|
1288 |
|
1289 /** |
|
1290 * Returns the number of documents in this index containing the $term. |
|
1291 * |
|
1292 * @param Zend_Search_Lucene_Index_Term $term |
|
1293 * @return integer |
|
1294 */ |
|
1295 public function docFreq(Zend_Search_Lucene_Index_Term $term) |
|
1296 { |
|
1297 $result = 0; |
|
1298 foreach ($this->_segmentInfos as $segInfo) { |
|
1299 $termInfo = $segInfo->getTermInfo($term); |
|
1300 if ($termInfo !== null) { |
|
1301 $result += $termInfo->docFreq; |
|
1302 } |
|
1303 } |
|
1304 |
|
1305 return $result; |
|
1306 } |
|
1307 |
|
1308 |
|
1309 /** |
|
1310 * Retrive similarity used by index reader |
|
1311 * |
|
1312 * @return Zend_Search_Lucene_Search_Similarity |
|
1313 */ |
|
1314 public function getSimilarity() |
|
1315 { |
|
1316 /** Zend_Search_Lucene_Search_Similarity */ |
|
1317 require_once 'Zend/Search/Lucene/Search/Similarity.php'; |
|
1318 |
|
1319 return Zend_Search_Lucene_Search_Similarity::getDefault(); |
|
1320 } |
|
1321 |
|
1322 |
|
1323 /** |
|
1324 * Returns a normalization factor for "field, document" pair. |
|
1325 * |
|
1326 * @param integer $id |
|
1327 * @param string $fieldName |
|
1328 * @return float |
|
1329 */ |
|
1330 public function norm($id, $fieldName) |
|
1331 { |
|
1332 if ($id >= $this->_docCount) { |
|
1333 return null; |
|
1334 } |
|
1335 |
|
1336 $segmentStartId = 0; |
|
1337 foreach ($this->_segmentInfos as $segInfo) { |
|
1338 if ($segmentStartId + $segInfo->count() > $id) { |
|
1339 break; |
|
1340 } |
|
1341 |
|
1342 $segmentStartId += $segInfo->count(); |
|
1343 } |
|
1344 |
|
1345 if ($segInfo->isDeleted($id - $segmentStartId)) { |
|
1346 return 0; |
|
1347 } |
|
1348 |
|
1349 return $segInfo->norm($id - $segmentStartId, $fieldName); |
|
1350 } |
|
1351 |
|
1352 /** |
|
1353 * Returns true if any documents have been deleted from this index. |
|
1354 * |
|
1355 * @return boolean |
|
1356 */ |
|
1357 public function hasDeletions() |
|
1358 { |
|
1359 foreach ($this->_segmentInfos as $segmentInfo) { |
|
1360 if ($segmentInfo->hasDeletions()) { |
|
1361 return true; |
|
1362 } |
|
1363 } |
|
1364 |
|
1365 return false; |
|
1366 } |
|
1367 |
|
1368 |
|
1369 /** |
|
1370 * Deletes a document from the index. |
|
1371 * $id is an internal document id |
|
1372 * |
|
1373 * @param integer|Zend_Search_Lucene_Search_QueryHit $id |
|
1374 * @throws Zend_Search_Lucene_Exception |
|
1375 */ |
|
1376 public function delete($id) |
|
1377 { |
|
1378 if ($id instanceof Zend_Search_Lucene_Search_QueryHit) { |
|
1379 /* @var $id Zend_Search_Lucene_Search_QueryHit */ |
|
1380 $id = $id->id; |
|
1381 } |
|
1382 |
|
1383 if ($id >= $this->_docCount) { |
|
1384 require_once 'Zend/Search/Lucene/Exception.php'; |
|
1385 throw new Zend_Search_Lucene_Exception('Document id is out of the range.'); |
|
1386 } |
|
1387 |
|
1388 $segmentStartId = 0; |
|
1389 foreach ($this->_segmentInfos as $segmentInfo) { |
|
1390 if ($segmentStartId + $segmentInfo->count() > $id) { |
|
1391 break; |
|
1392 } |
|
1393 |
|
1394 $segmentStartId += $segmentInfo->count(); |
|
1395 } |
|
1396 $segmentInfo->delete($id - $segmentStartId); |
|
1397 |
|
1398 $this->_hasChanges = true; |
|
1399 } |
|
1400 |
|
1401 |
|
1402 |
|
1403 /** |
|
1404 * Adds a document to this index. |
|
1405 * |
|
1406 * @param Zend_Search_Lucene_Document $document |
|
1407 */ |
|
1408 public function addDocument(Zend_Search_Lucene_Document $document) |
|
1409 { |
|
1410 $this->_getIndexWriter()->addDocument($document); |
|
1411 $this->_docCount++; |
|
1412 |
|
1413 $this->_hasChanges = true; |
|
1414 } |
|
1415 |
|
1416 |
|
1417 /** |
|
1418 * Update document counter |
|
1419 */ |
|
1420 private function _updateDocCount() |
|
1421 { |
|
1422 $this->_docCount = 0; |
|
1423 foreach ($this->_segmentInfos as $segInfo) { |
|
1424 $this->_docCount += $segInfo->count(); |
|
1425 } |
|
1426 } |
|
1427 |
|
1428 /** |
|
1429 * Commit changes resulting from delete() or undeleteAll() operations. |
|
1430 * |
|
1431 * @todo undeleteAll processing. |
|
1432 */ |
|
1433 public function commit() |
|
1434 { |
|
1435 if ($this->_hasChanges) { |
|
1436 $this->_getIndexWriter()->commit(); |
|
1437 |
|
1438 $this->_updateDocCount(); |
|
1439 |
|
1440 $this->_hasChanges = false; |
|
1441 } |
|
1442 } |
|
1443 |
|
1444 |
|
1445 /** |
|
1446 * Optimize index. |
|
1447 * |
|
1448 * Merges all segments into one |
|
1449 */ |
|
1450 public function optimize() |
|
1451 { |
|
1452 // Commit changes if any changes have been made |
|
1453 $this->commit(); |
|
1454 |
|
1455 if (count($this->_segmentInfos) > 1 || $this->hasDeletions()) { |
|
1456 $this->_getIndexWriter()->optimize(); |
|
1457 $this->_updateDocCount(); |
|
1458 } |
|
1459 } |
|
1460 |
|
1461 |
|
1462 /** |
|
1463 * Returns an array of all terms in this index. |
|
1464 * |
|
1465 * @return array |
|
1466 */ |
|
1467 public function terms() |
|
1468 { |
|
1469 $result = array(); |
|
1470 |
|
1471 /** Zend_Search_Lucene_Index_TermsPriorityQueue */ |
|
1472 require_once 'Zend/Search/Lucene/Index/TermsPriorityQueue.php'; |
|
1473 |
|
1474 $segmentInfoQueue = new Zend_Search_Lucene_Index_TermsPriorityQueue(); |
|
1475 |
|
1476 foreach ($this->_segmentInfos as $segmentInfo) { |
|
1477 $segmentInfo->resetTermsStream(); |
|
1478 |
|
1479 // Skip "empty" segments |
|
1480 if ($segmentInfo->currentTerm() !== null) { |
|
1481 $segmentInfoQueue->put($segmentInfo); |
|
1482 } |
|
1483 } |
|
1484 |
|
1485 while (($segmentInfo = $segmentInfoQueue->pop()) !== null) { |
|
1486 if ($segmentInfoQueue->top() === null || |
|
1487 $segmentInfoQueue->top()->currentTerm()->key() != |
|
1488 $segmentInfo->currentTerm()->key()) { |
|
1489 // We got new term |
|
1490 $result[] = $segmentInfo->currentTerm(); |
|
1491 } |
|
1492 |
|
1493 if ($segmentInfo->nextTerm() !== null) { |
|
1494 // Put segment back into the priority queue |
|
1495 $segmentInfoQueue->put($segmentInfo); |
|
1496 } |
|
1497 } |
|
1498 |
|
1499 return $result; |
|
1500 } |
|
1501 |
|
1502 |
|
1503 /** |
|
1504 * Terms stream priority queue object |
|
1505 * |
|
1506 * @var Zend_Search_Lucene_TermStreamsPriorityQueue |
|
1507 */ |
|
1508 private $_termsStream = null; |
|
1509 |
|
1510 /** |
|
1511 * Reset terms stream. |
|
1512 */ |
|
1513 public function resetTermsStream() |
|
1514 { |
|
1515 if ($this->_termsStream === null) { |
|
1516 /** Zend_Search_Lucene_TermStreamsPriorityQueue */ |
|
1517 require_once 'Zend/Search/Lucene/TermStreamsPriorityQueue.php'; |
|
1518 |
|
1519 $this->_termsStream = new Zend_Search_Lucene_TermStreamsPriorityQueue($this->_segmentInfos); |
|
1520 } else { |
|
1521 $this->_termsStream->resetTermsStream(); |
|
1522 } |
|
1523 } |
|
1524 |
|
1525 /** |
|
1526 * Skip terms stream up to the specified term preffix. |
|
1527 * |
|
1528 * Prefix contains fully specified field info and portion of searched term |
|
1529 * |
|
1530 * @param Zend_Search_Lucene_Index_Term $prefix |
|
1531 */ |
|
1532 public function skipTo(Zend_Search_Lucene_Index_Term $prefix) |
|
1533 { |
|
1534 $this->_termsStream->skipTo($prefix); |
|
1535 } |
|
1536 |
|
1537 /** |
|
1538 * Scans terms dictionary and returns next term |
|
1539 * |
|
1540 * @return Zend_Search_Lucene_Index_Term|null |
|
1541 */ |
|
1542 public function nextTerm() |
|
1543 { |
|
1544 return $this->_termsStream->nextTerm(); |
|
1545 } |
|
1546 |
|
1547 /** |
|
1548 * Returns term in current position |
|
1549 * |
|
1550 * @return Zend_Search_Lucene_Index_Term|null |
|
1551 */ |
|
1552 public function currentTerm() |
|
1553 { |
|
1554 return $this->_termsStream->currentTerm(); |
|
1555 } |
|
1556 |
|
1557 /** |
|
1558 * Close terms stream |
|
1559 * |
|
1560 * Should be used for resources clean up if stream is not read up to the end |
|
1561 */ |
|
1562 public function closeTermsStream() |
|
1563 { |
|
1564 $this->_termsStream->closeTermsStream(); |
|
1565 $this->_termsStream = null; |
|
1566 } |
|
1567 |
|
1568 |
|
1569 /************************************************************************* |
|
1570 @todo UNIMPLEMENTED |
|
1571 *************************************************************************/ |
|
1572 /** |
|
1573 * Undeletes all documents currently marked as deleted in this index. |
|
1574 * |
|
1575 * @todo Implementation |
|
1576 */ |
|
1577 public function undeleteAll() |
|
1578 {} |
|
1579 } |