|
1 <?php |
|
2 /** |
|
3 * Zend Framework |
|
4 * |
|
5 * LICENSE |
|
6 * |
|
7 * This source file is subject to the new BSD license that is bundled |
|
8 * with this package in the file LICENSE.txt. |
|
9 * It is also available through the world-wide-web at this URL: |
|
10 * http://framework.zend.com/license/new-bsd |
|
11 * If you did not receive a copy of the license and are unable to |
|
12 * obtain it through the world-wide-web, please send an email |
|
13 * to license@zend.com so we can send you a copy immediately. |
|
14 * |
|
15 * @category Zend |
|
16 * @package Zend_Search_Lucene |
|
17 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
18 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
19 * @version $Id: MultiSearcher.php 22967 2010-09-18 18:53:58Z ramon $ |
|
20 */ |
|
21 |
|
22 |
|
23 /** Zend_Search_Lucene_Interface */ |
|
24 require_once 'Zend/Search/Lucene/Interface.php'; |
|
25 |
|
26 /** |
|
27 * Multisearcher allows to search through several independent indexes. |
|
28 * |
|
29 * @category Zend |
|
30 * @package Zend_Search_Lucene |
|
31 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
32 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
33 */ |
|
34 class Zend_Search_Lucene_Interface_MultiSearcher implements Zend_Search_Lucene_Interface |
|
35 { |
|
36 /** |
|
37 * List of indices for searching. |
|
38 * Array of Zend_Search_Lucene_Interface objects |
|
39 * |
|
40 * @var array |
|
41 */ |
|
42 protected $_indices; |
|
43 |
|
44 /** |
|
45 * Object constructor. |
|
46 * |
|
47 * @param array $indices Arrays of indices for search |
|
48 * @throws Zend_Search_Lucene_Exception |
|
49 */ |
|
50 public function __construct($indices = array()) |
|
51 { |
|
52 $this->_indices = $indices; |
|
53 |
|
54 foreach ($this->_indices as $index) { |
|
55 if (!$index instanceof Zend_Search_Lucene_Interface) { |
|
56 require_once 'Zend/Search/Lucene/Exception.php'; |
|
57 throw new Zend_Search_Lucene_Exception('sub-index objects have to implement Zend_Search_Lucene_Interface.'); |
|
58 } |
|
59 } |
|
60 } |
|
61 |
|
62 /** |
|
63 * Add index for searching. |
|
64 * |
|
65 * @param Zend_Search_Lucene_Interface $index |
|
66 */ |
|
67 public function addIndex(Zend_Search_Lucene_Interface $index) |
|
68 { |
|
69 $this->_indices[] = $index; |
|
70 } |
|
71 |
|
72 |
|
73 /** |
|
74 * Get current generation number |
|
75 * |
|
76 * Returns generation number |
|
77 * 0 means pre-2.1 index format |
|
78 * -1 means there are no segments files. |
|
79 * |
|
80 * @param Zend_Search_Lucene_Storage_Directory $directory |
|
81 * @return integer |
|
82 * @throws Zend_Search_Lucene_Exception |
|
83 */ |
|
84 public static function getActualGeneration(Zend_Search_Lucene_Storage_Directory $directory) |
|
85 { |
|
86 require_once 'Zend/Search/Lucene/Exception.php'; |
|
87 throw new Zend_Search_Lucene_Exception("Generation number can't be retrieved for multi-searcher"); |
|
88 } |
|
89 |
|
90 /** |
|
91 * Get segments file name |
|
92 * |
|
93 * @param integer $generation |
|
94 * @return string |
|
95 */ |
|
96 public static function getSegmentFileName($generation) |
|
97 { |
|
98 return Zend_Search_Lucene::getSegmentFileName($generation); |
|
99 } |
|
100 |
|
101 /** |
|
102 * Get index format version |
|
103 * |
|
104 * @return integer |
|
105 * @throws Zend_Search_Lucene_Exception |
|
106 */ |
|
107 public function getFormatVersion() |
|
108 { |
|
109 require_once 'Zend/Search/Lucene/Exception.php'; |
|
110 throw new Zend_Search_Lucene_Exception("Format version can't be retrieved for multi-searcher"); |
|
111 } |
|
112 |
|
113 /** |
|
114 * Set index format version. |
|
115 * Index is converted to this format at the nearest upfdate time |
|
116 * |
|
117 * @param int $formatVersion |
|
118 */ |
|
119 public function setFormatVersion($formatVersion) |
|
120 { |
|
121 foreach ($this->_indices as $index) { |
|
122 $index->setFormatVersion($formatVersion); |
|
123 } |
|
124 } |
|
125 |
|
126 /** |
|
127 * Returns the Zend_Search_Lucene_Storage_Directory instance for this index. |
|
128 * |
|
129 * @return Zend_Search_Lucene_Storage_Directory |
|
130 */ |
|
131 public function getDirectory() |
|
132 { |
|
133 require_once 'Zend/Search/Lucene/Exception.php'; |
|
134 throw new Zend_Search_Lucene_Exception("Index directory can't be retrieved for multi-searcher"); |
|
135 } |
|
136 |
|
137 /** |
|
138 * Returns the total number of documents in this index (including deleted documents). |
|
139 * |
|
140 * @return integer |
|
141 */ |
|
142 public function count() |
|
143 { |
|
144 $count = 0; |
|
145 |
|
146 foreach ($this->_indices as $index) { |
|
147 $count += $index->count(); |
|
148 } |
|
149 |
|
150 return $count; |
|
151 } |
|
152 |
|
153 /** |
|
154 * Returns one greater than the largest possible document number. |
|
155 * This may be used to, e.g., determine how big to allocate a structure which will have |
|
156 * an element for every document number in an index. |
|
157 * |
|
158 * @return integer |
|
159 */ |
|
160 public function maxDoc() |
|
161 { |
|
162 return $this->count(); |
|
163 } |
|
164 |
|
165 /** |
|
166 * Returns the total number of non-deleted documents in this index. |
|
167 * |
|
168 * @return integer |
|
169 */ |
|
170 public function numDocs() |
|
171 { |
|
172 $docs = 0; |
|
173 |
|
174 foreach ($this->_indices as $index) { |
|
175 $docs += $index->numDocs(); |
|
176 } |
|
177 |
|
178 return $docs; |
|
179 } |
|
180 |
|
181 /** |
|
182 * Checks, that document is deleted |
|
183 * |
|
184 * @param integer $id |
|
185 * @return boolean |
|
186 * @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range |
|
187 */ |
|
188 public function isDeleted($id) |
|
189 { |
|
190 foreach ($this->_indices as $index) { |
|
191 $indexCount = $index->count(); |
|
192 |
|
193 if ($indexCount > $id) { |
|
194 return $index->isDeleted($id); |
|
195 } |
|
196 |
|
197 $id -= $indexCount; |
|
198 } |
|
199 |
|
200 require_once 'Zend/Search/Lucene/Exception.php'; |
|
201 throw new Zend_Search_Lucene_Exception('Document id is out of the range.'); |
|
202 } |
|
203 |
|
204 /** |
|
205 * Set default search field. |
|
206 * |
|
207 * Null means, that search is performed through all fields by default |
|
208 * |
|
209 * Default value is null |
|
210 * |
|
211 * @param string $fieldName |
|
212 */ |
|
213 public static function setDefaultSearchField($fieldName) |
|
214 { |
|
215 foreach ($this->_indices as $index) { |
|
216 $index->setDefaultSearchField($fieldName); |
|
217 } |
|
218 } |
|
219 |
|
220 |
|
221 /** |
|
222 * Get default search field. |
|
223 * |
|
224 * Null means, that search is performed through all fields by default |
|
225 * |
|
226 * @return string |
|
227 * @throws Zend_Search_Lucene_Exception |
|
228 */ |
|
229 public static function getDefaultSearchField() |
|
230 { |
|
231 if (count($this->_indices) == 0) { |
|
232 require_once 'Zend/Search/Lucene/Exception.php'; |
|
233 throw new Zend_Search_Lucene_Exception('Indices list is empty'); |
|
234 } |
|
235 |
|
236 $defaultSearchField = reset($this->_indices)->getDefaultSearchField(); |
|
237 |
|
238 foreach ($this->_indices as $index) { |
|
239 if ($index->getDefaultSearchField() !== $defaultSearchField) { |
|
240 require_once 'Zend/Search/Lucene/Exception.php'; |
|
241 throw new Zend_Search_Lucene_Exception('Indices have different default search field.'); |
|
242 } |
|
243 } |
|
244 |
|
245 return $defaultSearchField; |
|
246 } |
|
247 |
|
248 /** |
|
249 * Set result set limit. |
|
250 * |
|
251 * 0 (default) means no limit |
|
252 * |
|
253 * @param integer $limit |
|
254 */ |
|
255 public static function setResultSetLimit($limit) |
|
256 { |
|
257 foreach ($this->_indices as $index) { |
|
258 $index->setResultSetLimit($limit); |
|
259 } |
|
260 } |
|
261 |
|
262 /** |
|
263 * Set result set limit. |
|
264 * |
|
265 * 0 means no limit |
|
266 * |
|
267 * @return integer |
|
268 * @throws Zend_Search_Lucene_Exception |
|
269 */ |
|
270 public static function getResultSetLimit() |
|
271 { |
|
272 if (count($this->_indices) == 0) { |
|
273 require_once 'Zend/Search/Lucene/Exception.php'; |
|
274 throw new Zend_Search_Lucene_Exception('Indices list is empty'); |
|
275 } |
|
276 |
|
277 $defaultResultSetLimit = reset($this->_indices)->getResultSetLimit(); |
|
278 |
|
279 foreach ($this->_indices as $index) { |
|
280 if ($index->getResultSetLimit() !== $defaultResultSetLimit) { |
|
281 require_once 'Zend/Search/Lucene/Exception.php'; |
|
282 throw new Zend_Search_Lucene_Exception('Indices have different default search field.'); |
|
283 } |
|
284 } |
|
285 |
|
286 return $defaultResultSetLimit; |
|
287 } |
|
288 |
|
289 /** |
|
290 * Retrieve index maxBufferedDocs option |
|
291 * |
|
292 * maxBufferedDocs is a minimal number of documents required before |
|
293 * the buffered in-memory documents are written into a new Segment |
|
294 * |
|
295 * Default value is 10 |
|
296 * |
|
297 * @return integer |
|
298 * @throws Zend_Search_Lucene_Exception |
|
299 */ |
|
300 public function getMaxBufferedDocs() |
|
301 { |
|
302 if (count($this->_indices) == 0) { |
|
303 require_once 'Zend/Search/Lucene/Exception.php'; |
|
304 throw new Zend_Search_Lucene_Exception('Indices list is empty'); |
|
305 } |
|
306 |
|
307 $maxBufferedDocs = reset($this->_indices)->getMaxBufferedDocs(); |
|
308 |
|
309 foreach ($this->_indices as $index) { |
|
310 if ($index->getMaxBufferedDocs() !== $maxBufferedDocs) { |
|
311 require_once 'Zend/Search/Lucene/Exception.php'; |
|
312 throw new Zend_Search_Lucene_Exception('Indices have different default search field.'); |
|
313 } |
|
314 } |
|
315 |
|
316 return $maxBufferedDocs; |
|
317 } |
|
318 |
|
319 /** |
|
320 * Set index maxBufferedDocs option |
|
321 * |
|
322 * maxBufferedDocs is a minimal number of documents required before |
|
323 * the buffered in-memory documents are written into a new Segment |
|
324 * |
|
325 * Default value is 10 |
|
326 * |
|
327 * @param integer $maxBufferedDocs |
|
328 */ |
|
329 public function setMaxBufferedDocs($maxBufferedDocs) |
|
330 { |
|
331 foreach ($this->_indices as $index) { |
|
332 $index->setMaxBufferedDocs($maxBufferedDocs); |
|
333 } |
|
334 } |
|
335 |
|
336 /** |
|
337 * Retrieve index maxMergeDocs option |
|
338 * |
|
339 * maxMergeDocs is a largest number of documents ever merged by addDocument(). |
|
340 * Small values (e.g., less than 10,000) are best for interactive indexing, |
|
341 * as this limits the length of pauses while indexing to a few seconds. |
|
342 * Larger values are best for batched indexing and speedier searches. |
|
343 * |
|
344 * Default value is PHP_INT_MAX |
|
345 * |
|
346 * @return integer |
|
347 * @throws Zend_Search_Lucene_Exception |
|
348 */ |
|
349 public function getMaxMergeDocs() |
|
350 { |
|
351 if (count($this->_indices) == 0) { |
|
352 require_once 'Zend/Search/Lucene/Exception.php'; |
|
353 throw new Zend_Search_Lucene_Exception('Indices list is empty'); |
|
354 } |
|
355 |
|
356 $maxMergeDocs = reset($this->_indices)->getMaxMergeDocs(); |
|
357 |
|
358 foreach ($this->_indices as $index) { |
|
359 if ($index->getMaxMergeDocs() !== $maxMergeDocs) { |
|
360 require_once 'Zend/Search/Lucene/Exception.php'; |
|
361 throw new Zend_Search_Lucene_Exception('Indices have different default search field.'); |
|
362 } |
|
363 } |
|
364 |
|
365 return $maxMergeDocs; |
|
366 } |
|
367 |
|
368 /** |
|
369 * Set index maxMergeDocs option |
|
370 * |
|
371 * maxMergeDocs is a largest number of documents ever merged by addDocument(). |
|
372 * Small values (e.g., less than 10,000) are best for interactive indexing, |
|
373 * as this limits the length of pauses while indexing to a few seconds. |
|
374 * Larger values are best for batched indexing and speedier searches. |
|
375 * |
|
376 * Default value is PHP_INT_MAX |
|
377 * |
|
378 * @param integer $maxMergeDocs |
|
379 */ |
|
380 public function setMaxMergeDocs($maxMergeDocs) |
|
381 { |
|
382 foreach ($this->_indices as $index) { |
|
383 $index->setMaxMergeDocs($maxMergeDocs); |
|
384 } |
|
385 } |
|
386 |
|
387 /** |
|
388 * Retrieve index mergeFactor option |
|
389 * |
|
390 * mergeFactor determines how often segment indices are merged by addDocument(). |
|
391 * With smaller values, less RAM is used while indexing, |
|
392 * and searches on unoptimized indices are faster, |
|
393 * but indexing speed is slower. |
|
394 * With larger values, more RAM is used during indexing, |
|
395 * and while searches on unoptimized indices are slower, |
|
396 * indexing is faster. |
|
397 * Thus larger values (> 10) are best for batch index creation, |
|
398 * and smaller values (< 10) for indices that are interactively maintained. |
|
399 * |
|
400 * Default value is 10 |
|
401 * |
|
402 * @return integer |
|
403 * @throws Zend_Search_Lucene_Exception |
|
404 */ |
|
405 public function getMergeFactor() |
|
406 { |
|
407 if (count($this->_indices) == 0) { |
|
408 require_once 'Zend/Search/Lucene/Exception.php'; |
|
409 throw new Zend_Search_Lucene_Exception('Indices list is empty'); |
|
410 } |
|
411 |
|
412 $mergeFactor = reset($this->_indices)->getMergeFactor(); |
|
413 |
|
414 foreach ($this->_indices as $index) { |
|
415 if ($index->getMergeFactor() !== $mergeFactor) { |
|
416 require_once 'Zend/Search/Lucene/Exception.php'; |
|
417 throw new Zend_Search_Lucene_Exception('Indices have different default search field.'); |
|
418 } |
|
419 } |
|
420 |
|
421 return $mergeFactor; |
|
422 } |
|
423 |
|
424 /** |
|
425 * Set index mergeFactor option |
|
426 * |
|
427 * mergeFactor determines how often segment indices are merged by addDocument(). |
|
428 * With smaller values, less RAM is used while indexing, |
|
429 * and searches on unoptimized indices are faster, |
|
430 * but indexing speed is slower. |
|
431 * With larger values, more RAM is used during indexing, |
|
432 * and while searches on unoptimized indices are slower, |
|
433 * indexing is faster. |
|
434 * Thus larger values (> 10) are best for batch index creation, |
|
435 * and smaller values (< 10) for indices that are interactively maintained. |
|
436 * |
|
437 * Default value is 10 |
|
438 * |
|
439 * @param integer $maxMergeDocs |
|
440 */ |
|
441 public function setMergeFactor($mergeFactor) |
|
442 { |
|
443 foreach ($this->_indices as $index) { |
|
444 $index->setMaxMergeDocs($mergeFactor); |
|
445 } |
|
446 } |
|
447 |
|
448 /** |
|
449 * Performs a query against the index and returns an array |
|
450 * of Zend_Search_Lucene_Search_QueryHit objects. |
|
451 * Input is a string or Zend_Search_Lucene_Search_Query. |
|
452 * |
|
453 * @param mixed $query |
|
454 * @return array Zend_Search_Lucene_Search_QueryHit |
|
455 * @throws Zend_Search_Lucene_Exception |
|
456 */ |
|
457 public function find($query) |
|
458 { |
|
459 if (count($this->_indices) == 0) { |
|
460 return array(); |
|
461 } |
|
462 |
|
463 $hitsList = array(); |
|
464 |
|
465 $indexShift = 0; |
|
466 foreach ($this->_indices as $index) { |
|
467 $hits = $index->find($query); |
|
468 |
|
469 if ($indexShift != 0) { |
|
470 foreach ($hits as $hit) { |
|
471 $hit->id += $indexShift; |
|
472 } |
|
473 } |
|
474 |
|
475 $indexShift += $index->count(); |
|
476 $hitsList[] = $hits; |
|
477 } |
|
478 |
|
479 /** @todo Implement advanced sorting */ |
|
480 |
|
481 return call_user_func_array('array_merge', $hitsList); |
|
482 } |
|
483 |
|
484 /** |
|
485 * Returns a list of all unique field names that exist in this index. |
|
486 * |
|
487 * @param boolean $indexed |
|
488 * @return array |
|
489 */ |
|
490 public function getFieldNames($indexed = false) |
|
491 { |
|
492 $fieldNamesList = array(); |
|
493 |
|
494 foreach ($this->_indices as $index) { |
|
495 $fieldNamesList[] = $index->getFieldNames($indexed); |
|
496 } |
|
497 |
|
498 return array_unique(call_user_func_array('array_merge', $fieldNamesList)); |
|
499 } |
|
500 |
|
501 /** |
|
502 * Returns a Zend_Search_Lucene_Document object for the document |
|
503 * number $id in this index. |
|
504 * |
|
505 * @param integer|Zend_Search_Lucene_Search_QueryHit $id |
|
506 * @return Zend_Search_Lucene_Document |
|
507 * @throws Zend_Search_Lucene_Exception Exception is thrown if $id is out of the range |
|
508 */ |
|
509 public function getDocument($id) |
|
510 { |
|
511 if ($id instanceof Zend_Search_Lucene_Search_QueryHit) { |
|
512 /* @var $id Zend_Search_Lucene_Search_QueryHit */ |
|
513 $id = $id->id; |
|
514 } |
|
515 |
|
516 foreach ($this->_indices as $index) { |
|
517 $indexCount = $index->count(); |
|
518 |
|
519 if ($indexCount > $id) { |
|
520 return $index->getDocument($id); |
|
521 } |
|
522 |
|
523 $id -= $indexCount; |
|
524 } |
|
525 |
|
526 require_once 'Zend/Search/Lucene/Exception.php'; |
|
527 throw new Zend_Search_Lucene_Exception('Document id is out of the range.'); |
|
528 } |
|
529 |
|
530 /** |
|
531 * Returns true if index contain documents with specified term. |
|
532 * |
|
533 * Is used for query optimization. |
|
534 * |
|
535 * @param Zend_Search_Lucene_Index_Term $term |
|
536 * @return boolean |
|
537 */ |
|
538 public function hasTerm(Zend_Search_Lucene_Index_Term $term) |
|
539 { |
|
540 foreach ($this->_indices as $index) { |
|
541 if ($index->hasTerm($term)) { |
|
542 return true; |
|
543 } |
|
544 } |
|
545 |
|
546 return false; |
|
547 } |
|
548 |
|
549 /** |
|
550 * Returns IDs of all the documents containing term. |
|
551 * |
|
552 * @param Zend_Search_Lucene_Index_Term $term |
|
553 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter |
|
554 * @return array |
|
555 * @throws Zend_Search_Lucene_Exception |
|
556 */ |
|
557 public function termDocs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null) |
|
558 { |
|
559 if ($docsFilter != null) { |
|
560 require_once 'Zend/Search/Lucene/Exception.php'; |
|
561 throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher'); |
|
562 } |
|
563 |
|
564 $docsList = array(); |
|
565 |
|
566 $indexShift = 0; |
|
567 foreach ($this->_indices as $index) { |
|
568 $docs = $index->termDocs($term); |
|
569 |
|
570 if ($indexShift != 0) { |
|
571 foreach ($docs as $id => $docId) { |
|
572 $docs[$id] += $indexShift; |
|
573 } |
|
574 } |
|
575 |
|
576 $indexShift += $index->count(); |
|
577 $docsList[] = $docs; |
|
578 } |
|
579 |
|
580 return call_user_func_array('array_merge', $docsList); |
|
581 } |
|
582 |
|
583 /** |
|
584 * Returns documents filter for all documents containing term. |
|
585 * |
|
586 * It performs the same operation as termDocs, but return result as |
|
587 * Zend_Search_Lucene_Index_DocsFilter object |
|
588 * |
|
589 * @param Zend_Search_Lucene_Index_Term $term |
|
590 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter |
|
591 * @return Zend_Search_Lucene_Index_DocsFilter |
|
592 * @throws Zend_Search_Lucene_Exception |
|
593 */ |
|
594 public function termDocsFilter(Zend_Search_Lucene_Index_Term $term, $docsFilter = null) |
|
595 { |
|
596 require_once 'Zend/Search/Lucene/Exception.php'; |
|
597 throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher'); |
|
598 } |
|
599 |
|
600 /** |
|
601 * Returns an array of all term freqs. |
|
602 * Return array structure: array( docId => freq, ...) |
|
603 * |
|
604 * @param Zend_Search_Lucene_Index_Term $term |
|
605 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter |
|
606 * @return integer |
|
607 * @throws Zend_Search_Lucene_Exception |
|
608 */ |
|
609 public function termFreqs(Zend_Search_Lucene_Index_Term $term, $docsFilter = null) |
|
610 { |
|
611 if ($docsFilter != null) { |
|
612 require_once 'Zend/Search/Lucene/Exception.php'; |
|
613 throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher'); |
|
614 } |
|
615 |
|
616 $freqsList = array(); |
|
617 |
|
618 $indexShift = 0; |
|
619 foreach ($this->_indices as $index) { |
|
620 $freqs = $index->termFreqs($term); |
|
621 |
|
622 if ($indexShift != 0) { |
|
623 $freqsShifted = array(); |
|
624 |
|
625 foreach ($freqs as $docId => $freq) { |
|
626 $freqsShifted[$docId + $indexShift] = $freq; |
|
627 } |
|
628 $freqs = $freqsShifted; |
|
629 } |
|
630 |
|
631 $indexShift += $index->count(); |
|
632 $freqsList[] = $freqs; |
|
633 } |
|
634 |
|
635 return call_user_func_array('array_merge', $freqsList); |
|
636 } |
|
637 |
|
638 /** |
|
639 * Returns an array of all term positions in the documents. |
|
640 * Return array structure: array( docId => array( pos1, pos2, ...), ...) |
|
641 * |
|
642 * @param Zend_Search_Lucene_Index_Term $term |
|
643 * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter |
|
644 * @return array |
|
645 * @throws Zend_Search_Lucene_Exception |
|
646 */ |
|
647 public function termPositions(Zend_Search_Lucene_Index_Term $term, $docsFilter = null) |
|
648 { |
|
649 if ($docsFilter != null) { |
|
650 require_once 'Zend/Search/Lucene/Exception.php'; |
|
651 throw new Zend_Search_Lucene_Exception('Document filters could not used with multi-searcher'); |
|
652 } |
|
653 |
|
654 $termPositionsList = array(); |
|
655 |
|
656 $indexShift = 0; |
|
657 foreach ($this->_indices as $index) { |
|
658 $termPositions = $index->termPositions($term); |
|
659 |
|
660 if ($indexShift != 0) { |
|
661 $termPositionsShifted = array(); |
|
662 |
|
663 foreach ($termPositions as $docId => $positions) { |
|
664 $termPositions[$docId + $indexShift] = $positions; |
|
665 } |
|
666 $termPositions = $termPositionsShifted; |
|
667 } |
|
668 |
|
669 $indexShift += $index->count(); |
|
670 $termPositionsList[] = $termPositions; |
|
671 } |
|
672 |
|
673 return call_user_func_array('array_merge', $termPositions); |
|
674 } |
|
675 |
|
676 /** |
|
677 * Returns the number of documents in this index containing the $term. |
|
678 * |
|
679 * @param Zend_Search_Lucene_Index_Term $term |
|
680 * @return integer |
|
681 */ |
|
682 public function docFreq(Zend_Search_Lucene_Index_Term $term) |
|
683 { |
|
684 $docFreq = 0; |
|
685 |
|
686 foreach ($this->_indices as $index) { |
|
687 $docFreq += $index->docFreq($term); |
|
688 } |
|
689 |
|
690 return $docFreq; |
|
691 } |
|
692 |
|
693 /** |
|
694 * Retrive similarity used by index reader |
|
695 * |
|
696 * @return Zend_Search_Lucene_Search_Similarity |
|
697 * @throws Zend_Search_Lucene_Exception |
|
698 */ |
|
699 public function getSimilarity() |
|
700 { |
|
701 if (count($this->_indices) == 0) { |
|
702 require_once 'Zend/Search/Lucene/Exception.php'; |
|
703 throw new Zend_Search_Lucene_Exception('Indices list is empty'); |
|
704 } |
|
705 |
|
706 $similarity = reset($this->_indices)->getSimilarity(); |
|
707 |
|
708 foreach ($this->_indices as $index) { |
|
709 if ($index->getSimilarity() !== $similarity) { |
|
710 require_once 'Zend/Search/Lucene/Exception.php'; |
|
711 throw new Zend_Search_Lucene_Exception('Indices have different similarity.'); |
|
712 } |
|
713 } |
|
714 |
|
715 return $similarity; |
|
716 } |
|
717 |
|
718 /** |
|
719 * Returns a normalization factor for "field, document" pair. |
|
720 * |
|
721 * @param integer $id |
|
722 * @param string $fieldName |
|
723 * @return float |
|
724 */ |
|
725 public function norm($id, $fieldName) |
|
726 { |
|
727 foreach ($this->_indices as $index) { |
|
728 $indexCount = $index->count(); |
|
729 |
|
730 if ($indexCount > $id) { |
|
731 return $index->norm($id, $fieldName); |
|
732 } |
|
733 |
|
734 $id -= $indexCount; |
|
735 } |
|
736 |
|
737 return null; |
|
738 } |
|
739 |
|
740 /** |
|
741 * Returns true if any documents have been deleted from this index. |
|
742 * |
|
743 * @return boolean |
|
744 */ |
|
745 public function hasDeletions() |
|
746 { |
|
747 foreach ($this->_indices as $index) { |
|
748 if ($index->hasDeletions()) { |
|
749 return true; |
|
750 } |
|
751 } |
|
752 |
|
753 return false; |
|
754 } |
|
755 |
|
756 /** |
|
757 * Deletes a document from the index. |
|
758 * $id is an internal document id |
|
759 * |
|
760 * @param integer|Zend_Search_Lucene_Search_QueryHit $id |
|
761 * @throws Zend_Search_Lucene_Exception |
|
762 */ |
|
763 public function delete($id) |
|
764 { |
|
765 foreach ($this->_indices as $index) { |
|
766 $indexCount = $index->count(); |
|
767 |
|
768 if ($indexCount > $id) { |
|
769 $index->delete($id); |
|
770 return; |
|
771 } |
|
772 |
|
773 $id -= $indexCount; |
|
774 } |
|
775 |
|
776 require_once 'Zend/Search/Lucene/Exception.php'; |
|
777 throw new Zend_Search_Lucene_Exception('Document id is out of the range.'); |
|
778 } |
|
779 |
|
780 |
|
781 /** |
|
782 * Callback used to choose target index for new documents |
|
783 * |
|
784 * Function/method signature: |
|
785 * Zend_Search_Lucene_Interface callbackFunction(Zend_Search_Lucene_Document $document, array $indices); |
|
786 * |
|
787 * null means "default documents distributing algorithm" |
|
788 * |
|
789 * @var callback |
|
790 */ |
|
791 protected $_documentDistributorCallBack = null; |
|
792 |
|
793 /** |
|
794 * Set callback for choosing target index. |
|
795 * |
|
796 * @param callback $callback |
|
797 * @throws Zend_Search_Lucene_Exception |
|
798 */ |
|
799 public function setDocumentDistributorCallback($callback) |
|
800 { |
|
801 if ($callback !== null && !is_callable($callback)) { |
|
802 require_once 'Zend/Search/Lucene/Exception.php'; |
|
803 throw new Zend_Search_Lucene_Exception('$callback parameter must be a valid callback.'); |
|
804 } |
|
805 |
|
806 $this->_documentDistributorCallBack = $callback; |
|
807 } |
|
808 |
|
809 /** |
|
810 * Get callback for choosing target index. |
|
811 * |
|
812 * @return callback |
|
813 */ |
|
814 public function getDocumentDistributorCallback() |
|
815 { |
|
816 return $this->_documentDistributorCallBack; |
|
817 } |
|
818 |
|
819 /** |
|
820 * Adds a document to this index. |
|
821 * |
|
822 * @param Zend_Search_Lucene_Document $document |
|
823 * @throws Zend_Search_Lucene_Exception |
|
824 */ |
|
825 public function addDocument(Zend_Search_Lucene_Document $document) |
|
826 { |
|
827 if ($this->_documentDistributorCallBack !== null) { |
|
828 $index = call_user_func($this->_documentDistributorCallBack, $document, $this->_indices); |
|
829 } else { |
|
830 $index = $this->_indices[array_rand($this->_indices)]; |
|
831 } |
|
832 |
|
833 $index->addDocument($document); |
|
834 } |
|
835 |
|
836 /** |
|
837 * Commit changes resulting from delete() or undeleteAll() operations. |
|
838 */ |
|
839 public function commit() |
|
840 { |
|
841 foreach ($this->_indices as $index) { |
|
842 $index->commit(); |
|
843 } |
|
844 } |
|
845 |
|
846 /** |
|
847 * Optimize index. |
|
848 * |
|
849 * Merges all segments into one |
|
850 */ |
|
851 public function optimize() |
|
852 { |
|
853 foreach ($this->_indices as $index) { |
|
854 $index->optimise(); |
|
855 } |
|
856 } |
|
857 |
|
858 /** |
|
859 * Returns an array of all terms in this index. |
|
860 * |
|
861 * @return array |
|
862 */ |
|
863 public function terms() |
|
864 { |
|
865 $termsList = array(); |
|
866 |
|
867 foreach ($this->_indices as $index) { |
|
868 $termsList[] = $index->terms(); |
|
869 } |
|
870 |
|
871 return array_unique(call_user_func_array('array_merge', $termsList)); |
|
872 } |
|
873 |
|
874 |
|
875 /** |
|
876 * Terms stream priority queue object |
|
877 * |
|
878 * @var Zend_Search_Lucene_TermStreamsPriorityQueue |
|
879 */ |
|
880 private $_termsStream = null; |
|
881 |
|
882 /** |
|
883 * Reset terms stream. |
|
884 */ |
|
885 public function resetTermsStream() |
|
886 { |
|
887 if ($this->_termsStream === null) { |
|
888 /** Zend_Search_Lucene_TermStreamsPriorityQueue */ |
|
889 require_once 'Zend/Search/Lucene/TermStreamsPriorityQueue.php'; |
|
890 |
|
891 $this->_termsStream = new Zend_Search_Lucene_TermStreamsPriorityQueue($this->_indices); |
|
892 } else { |
|
893 $this->_termsStream->resetTermsStream(); |
|
894 } |
|
895 } |
|
896 |
|
897 /** |
|
898 * Skip terms stream up to specified term preffix. |
|
899 * |
|
900 * Prefix contains fully specified field info and portion of searched term |
|
901 * |
|
902 * @param Zend_Search_Lucene_Index_Term $prefix |
|
903 */ |
|
904 public function skipTo(Zend_Search_Lucene_Index_Term $prefix) |
|
905 { |
|
906 $this->_termsStream->skipTo($prefix); |
|
907 } |
|
908 |
|
909 /** |
|
910 * Scans terms dictionary and returns next term |
|
911 * |
|
912 * @return Zend_Search_Lucene_Index_Term|null |
|
913 */ |
|
914 public function nextTerm() |
|
915 { |
|
916 return $this->_termsStream->nextTerm(); |
|
917 } |
|
918 |
|
919 /** |
|
920 * Returns term in current position |
|
921 * |
|
922 * @return Zend_Search_Lucene_Index_Term|null |
|
923 */ |
|
924 public function currentTerm() |
|
925 { |
|
926 return $this->_termsStream->currentTerm(); |
|
927 } |
|
928 |
|
929 /** |
|
930 * Close terms stream |
|
931 * |
|
932 * Should be used for resources clean up if stream is not read up to the end |
|
933 */ |
|
934 public function closeTermsStream() |
|
935 { |
|
936 $this->_termsStream->closeTermsStream(); |
|
937 $this->_termsStream = null; |
|
938 } |
|
939 |
|
940 |
|
941 /** |
|
942 * Undeletes all documents currently marked as deleted in this index. |
|
943 */ |
|
944 public function undeleteAll() |
|
945 { |
|
946 foreach ($this->_indices as $index) { |
|
947 $index->undeleteAll(); |
|
948 } |
|
949 } |
|
950 |
|
951 |
|
952 /** |
|
953 * Add reference to the index object |
|
954 * |
|
955 * @internal |
|
956 */ |
|
957 public function addReference() |
|
958 { |
|
959 // Do nothing, since it's never referenced by indices |
|
960 } |
|
961 |
|
962 /** |
|
963 * Remove reference from the index object |
|
964 * |
|
965 * When reference count becomes zero, index is closed and resources are cleaned up |
|
966 * |
|
967 * @internal |
|
968 */ |
|
969 public function removeReference() |
|
970 { |
|
971 // Do nothing, since it's never referenced by indices |
|
972 } |
|
973 } |