| author | Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com> |
| Mon, 19 Dec 2011 00:56:02 +0100 | |
| changeset 428 | 8b51d23b758f |
| parent 68 | ecaf28ffe26e |
| child 207 | 621fa6caec0c |
| permissions | -rw-r--r-- |
|
0
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
1 |
<?php |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
2 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
3 |
* Zend Framework |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
4 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
5 |
* LICENSE |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
6 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
7 |
* This source file is subject to the new BSD license that is bundled |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
8 |
* with this package in the file LICENSE.txt. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
9 |
* It is also available through the world-wide-web at this URL: |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
10 |
* http://framework.zend.com/license/new-bsd |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
11 |
* If you did not receive a copy of the license and are unable to |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
12 |
* obtain it through the world-wide-web, please send an email |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
13 |
* to license@zend.com so we can send you a copy immediately. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
14 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
15 |
* @category Zend |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
16 |
* @package Zend_Search_Lucene |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
17 |
* @subpackage Index |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
18 |
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
19 |
* @license http://framework.zend.com/license/new-bsd New BSD License |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
20 |
* @version $Id: SegmentWriter.php 20096 2010-01-06 02:05:09Z bkarwin $ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
21 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
22 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
23 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
24 |
/** Zend_Search_Lucene_Index_FieldInfo */ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
25 |
require_once 'Zend/Search/Lucene/Index/FieldInfo.php'; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
26 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
27 |
/** Zend_Search_Lucene_Index_Term */ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
28 |
require_once 'Zend/Search/Lucene/Index/Term.php'; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
29 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
30 |
/** Zend_Search_Lucene_Index_TermInfo */ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
31 |
require_once 'Zend/Search/Lucene/Index/TermInfo.php'; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
32 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
33 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
34 |
* @category Zend |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
35 |
* @package Zend_Search_Lucene |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
36 |
* @subpackage Index |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
37 |
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
38 |
* @license http://framework.zend.com/license/new-bsd New BSD License |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
39 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
40 |
abstract class Zend_Search_Lucene_Index_SegmentWriter |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
41 |
{ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
42 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
43 |
* Expert: The fraction of terms in the "dictionary" which should be stored |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
44 |
* in RAM. Smaller values use more memory, but make searching slightly |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
45 |
* faster, while larger values use less memory and make searching slightly |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
46 |
* slower. Searching is typically not dominated by dictionary lookup, so |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
47 |
* tweaking this is rarely useful. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
48 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
49 |
* @var integer |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
50 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
51 |
public static $indexInterval = 128; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
52 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
53 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
54 |
* Expert: The fraction of TermDocs entries stored in skip tables. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
55 |
* Larger values result in smaller indexes, greater acceleration, but fewer |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
56 |
* accelerable cases, while smaller values result in bigger indexes, |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
57 |
* less acceleration and more |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
58 |
* accelerable cases. More detailed experiments would be useful here. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
59 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
60 |
* 0x7FFFFFFF indicates that we don't use skip data |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
61 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
62 |
* Note: not used in current implementation |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
63 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
64 |
* @var integer |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
65 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
66 |
public static $skipInterval = 0x7FFFFFFF; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
67 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
68 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
69 |
* Expert: The maximum number of skip levels. Smaller values result in |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
70 |
* slightly smaller indexes, but slower skipping in big posting lists. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
71 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
72 |
* 0 indicates that we don't use skip data |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
73 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
74 |
* Note: not used in current implementation |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
75 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
76 |
* @var integer |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
77 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
78 |
public static $maxSkipLevels = 0; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
79 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
80 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
81 |
* Number of docs in a segment |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
82 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
83 |
* @var integer |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
84 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
85 |
protected $_docCount = 0; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
86 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
87 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
88 |
* Segment name |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
89 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
90 |
* @var string |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
91 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
92 |
protected $_name; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
93 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
94 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
95 |
* File system adapter. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
96 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
97 |
* @var Zend_Search_Lucene_Storage_Directory |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
98 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
99 |
protected $_directory; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
100 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
101 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
102 |
* List of the index files. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
103 |
* Used for automatic compound file generation |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
104 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
105 |
* @var unknown_type |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
106 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
107 |
protected $_files = array(); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
108 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
109 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
110 |
* Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
111 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
112 |
* @var array |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
113 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
114 |
protected $_fields = array(); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
115 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
116 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
117 |
* Normalization factors. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
118 |
* An array fieldName => normVector |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
119 |
* normVector is a binary string. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
120 |
* Each byte corresponds to an indexed document in a segment and |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
121 |
* encodes normalization factor (float value, encoded by |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
122 |
* Zend_Search_Lucene_Search_Similarity::encodeNorm()) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
123 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
124 |
* @var array |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
125 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
126 |
protected $_norms = array(); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
127 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
128 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
129 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
130 |
* '.fdx' file - Stored Fields, the field index. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
131 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
132 |
* @var Zend_Search_Lucene_Storage_File |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
133 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
134 |
protected $_fdxFile = null; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
135 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
136 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
137 |
* '.fdt' file - Stored Fields, the field data. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
138 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
139 |
* @var Zend_Search_Lucene_Storage_File |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
140 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
141 |
protected $_fdtFile = null; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
142 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
143 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
144 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
145 |
* Object constructor. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
146 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
147 |
* @param Zend_Search_Lucene_Storage_Directory $directory |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
148 |
* @param string $name |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
149 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
150 |
public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
151 |
{ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
152 |
$this->_directory = $directory; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
153 |
$this->_name = $name; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
154 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
155 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
156 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
157 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
158 |
* Add field to the segment |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
159 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
160 |
* Returns actual field number |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
161 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
162 |
* @param Zend_Search_Lucene_Field $field |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
163 |
* @return integer |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
164 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
165 |
public function addField(Zend_Search_Lucene_Field $field) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
166 |
{ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
167 |
if (!isset($this->_fields[$field->name])) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
168 |
$fieldNumber = count($this->_fields); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
169 |
$this->_fields[$field->name] = |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
170 |
new Zend_Search_Lucene_Index_FieldInfo($field->name, |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
171 |
$field->isIndexed, |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
172 |
$fieldNumber, |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
173 |
$field->storeTermVector); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
174 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
175 |
return $fieldNumber; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
176 |
} else { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
177 |
$this->_fields[$field->name]->isIndexed |= $field->isIndexed; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
178 |
$this->_fields[$field->name]->storeTermVector |= $field->storeTermVector; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
179 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
180 |
return $this->_fields[$field->name]->number; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
181 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
182 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
183 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
184 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
185 |
* Add fieldInfo to the segment |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
186 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
187 |
* Returns actual field number |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
188 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
189 |
* @param Zend_Search_Lucene_Index_FieldInfo $fieldInfo |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
190 |
* @return integer |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
191 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
192 |
public function addFieldInfo(Zend_Search_Lucene_Index_FieldInfo $fieldInfo) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
193 |
{ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
194 |
if (!isset($this->_fields[$fieldInfo->name])) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
195 |
$fieldNumber = count($this->_fields); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
196 |
$this->_fields[$fieldInfo->name] = |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
197 |
new Zend_Search_Lucene_Index_FieldInfo($fieldInfo->name, |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
198 |
$fieldInfo->isIndexed, |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
199 |
$fieldNumber, |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
200 |
$fieldInfo->storeTermVector); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
201 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
202 |
return $fieldNumber; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
203 |
} else { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
204 |
$this->_fields[$fieldInfo->name]->isIndexed |= $fieldInfo->isIndexed; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
205 |
$this->_fields[$fieldInfo->name]->storeTermVector |= $fieldInfo->storeTermVector; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
206 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
207 |
return $this->_fields[$fieldInfo->name]->number; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
208 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
209 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
210 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
211 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
212 |
* Returns array of FieldInfo objects. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
213 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
214 |
* @return array |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
215 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
216 |
public function getFieldInfos() |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
217 |
{ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
218 |
return $this->_fields; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
219 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
220 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
221 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
222 |
* Add stored fields information |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
223 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
224 |
* @param array $storedFields array of Zend_Search_Lucene_Field objects |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
225 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
226 |
public function addStoredFields($storedFields) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
227 |
{ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
228 |
if (!isset($this->_fdxFile)) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
229 |
$this->_fdxFile = $this->_directory->createFile($this->_name . '.fdx'); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
230 |
$this->_fdtFile = $this->_directory->createFile($this->_name . '.fdt'); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
231 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
232 |
$this->_files[] = $this->_name . '.fdx'; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
233 |
$this->_files[] = $this->_name . '.fdt'; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
234 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
235 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
236 |
$this->_fdxFile->writeLong($this->_fdtFile->tell()); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
237 |
$this->_fdtFile->writeVInt(count($storedFields)); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
238 |
foreach ($storedFields as $field) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
239 |
$this->_fdtFile->writeVInt($this->_fields[$field->name]->number); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
240 |
$fieldBits = ($field->isTokenized ? 0x01 : 0x00) | |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
241 |
($field->isBinary ? 0x02 : 0x00) | |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
242 |
0x00; /* 0x04 - third bit, compressed (ZLIB) */ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
243 |
$this->_fdtFile->writeByte($fieldBits); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
244 |
if ($field->isBinary) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
245 |
$this->_fdtFile->writeVInt(strlen($field->value)); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
246 |
$this->_fdtFile->writeBytes($field->value); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
247 |
} else { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
248 |
$this->_fdtFile->writeString($field->getUtf8Value()); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
249 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
250 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
251 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
252 |
$this->_docCount++; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
253 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
254 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
255 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
256 |
* Returns the total number of documents in this segment. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
257 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
258 |
* @return integer |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
259 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
260 |
public function count() |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
261 |
{ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
262 |
return $this->_docCount; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
263 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
264 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
265 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
266 |
* Return segment name |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
267 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
268 |
* @return string |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
269 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
270 |
public function getName() |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
271 |
{ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
272 |
return $this->_name; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
273 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
274 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
275 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
276 |
* Dump Field Info (.fnm) segment file |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
277 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
278 |
protected function _dumpFNM() |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
279 |
{ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
280 |
$fnmFile = $this->_directory->createFile($this->_name . '.fnm'); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
281 |
$fnmFile->writeVInt(count($this->_fields)); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
282 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
283 |
$nrmFile = $this->_directory->createFile($this->_name . '.nrm'); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
284 |
// Write header |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
285 |
$nrmFile->writeBytes('NRM'); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
286 |
// Write format specifier |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
287 |
$nrmFile->writeByte((int)0xFF); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
288 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
289 |
foreach ($this->_fields as $field) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
290 |
$fnmFile->writeString($field->name); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
291 |
$fnmFile->writeByte(($field->isIndexed ? 0x01 : 0x00) | |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
292 |
($field->storeTermVector ? 0x02 : 0x00) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
293 |
// not supported yet 0x04 /* term positions are stored with the term vectors */ | |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
294 |
// not supported yet 0x08 /* term offsets are stored with the term vectors */ | |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
295 |
); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
296 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
297 |
if ($field->isIndexed) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
298 |
// pre-2.1 index mode (not used now) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
299 |
// $normFileName = $this->_name . '.f' . $field->number; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
300 |
// $fFile = $this->_directory->createFile($normFileName); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
301 |
// $fFile->writeBytes($this->_norms[$field->name]); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
302 |
// $this->_files[] = $normFileName; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
303 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
304 |
$nrmFile->writeBytes($this->_norms[$field->name]); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
305 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
306 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
307 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
308 |
$this->_files[] = $this->_name . '.fnm'; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
309 |
$this->_files[] = $this->_name . '.nrm'; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
310 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
311 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
312 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
313 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
314 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
315 |
* Term Dictionary file |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
316 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
317 |
* @var Zend_Search_Lucene_Storage_File |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
318 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
319 |
private $_tisFile = null; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
320 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
321 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
322 |
* Term Dictionary index file |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
323 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
324 |
* @var Zend_Search_Lucene_Storage_File |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
325 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
326 |
private $_tiiFile = null; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
327 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
328 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
329 |
* Frequencies file |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
330 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
331 |
* @var Zend_Search_Lucene_Storage_File |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
332 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
333 |
private $_frqFile = null; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
334 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
335 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
336 |
* Positions file |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
337 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
338 |
* @var Zend_Search_Lucene_Storage_File |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
339 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
340 |
private $_prxFile = null; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
341 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
342 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
343 |
* Number of written terms |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
344 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
345 |
* @var integer |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
346 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
347 |
private $_termCount; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
348 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
349 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
350 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
351 |
* Last saved term |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
352 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
353 |
* @var Zend_Search_Lucene_Index_Term |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
354 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
355 |
private $_prevTerm; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
356 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
357 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
358 |
* Last saved term info |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
359 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
360 |
* @var Zend_Search_Lucene_Index_TermInfo |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
361 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
362 |
private $_prevTermInfo; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
363 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
364 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
365 |
* Last saved index term |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
366 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
367 |
* @var Zend_Search_Lucene_Index_Term |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
368 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
369 |
private $_prevIndexTerm; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
370 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
371 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
372 |
* Last saved index term info |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
373 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
374 |
* @var Zend_Search_Lucene_Index_TermInfo |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
375 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
376 |
private $_prevIndexTermInfo; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
377 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
378 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
379 |
* Last term dictionary file position |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
380 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
381 |
* @var integer |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
382 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
383 |
private $_lastIndexPosition; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
384 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
385 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
386 |
* Create dicrionary, frequency and positions files and write necessary headers |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
387 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
388 |
public function initializeDictionaryFiles() |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
389 |
{ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
390 |
$this->_tisFile = $this->_directory->createFile($this->_name . '.tis'); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
391 |
$this->_tisFile->writeInt((int)0xFFFFFFFD); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
392 |
$this->_tisFile->writeLong(0 /* dummy data for terms count */); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
393 |
$this->_tisFile->writeInt(self::$indexInterval); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
394 |
$this->_tisFile->writeInt(self::$skipInterval); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
395 |
$this->_tisFile->writeInt(self::$maxSkipLevels); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
396 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
397 |
$this->_tiiFile = $this->_directory->createFile($this->_name . '.tii'); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
398 |
$this->_tiiFile->writeInt((int)0xFFFFFFFD); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
399 |
$this->_tiiFile->writeLong(0 /* dummy data for terms count */); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
400 |
$this->_tiiFile->writeInt(self::$indexInterval); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
401 |
$this->_tiiFile->writeInt(self::$skipInterval); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
402 |
$this->_tiiFile->writeInt(self::$maxSkipLevels); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
403 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
404 |
/** Dump dictionary header */ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
405 |
$this->_tiiFile->writeVInt(0); // preffix length |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
406 |
$this->_tiiFile->writeString(''); // suffix |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
407 |
$this->_tiiFile->writeInt((int)0xFFFFFFFF); // field number |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
408 |
$this->_tiiFile->writeByte((int)0x0F); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
409 |
$this->_tiiFile->writeVInt(0); // DocFreq |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
410 |
$this->_tiiFile->writeVInt(0); // FreqDelta |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
411 |
$this->_tiiFile->writeVInt(0); // ProxDelta |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
412 |
$this->_tiiFile->writeVInt(24); // IndexDelta |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
413 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
414 |
$this->_frqFile = $this->_directory->createFile($this->_name . '.frq'); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
415 |
$this->_prxFile = $this->_directory->createFile($this->_name . '.prx'); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
416 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
417 |
$this->_files[] = $this->_name . '.tis'; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
418 |
$this->_files[] = $this->_name . '.tii'; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
419 |
$this->_files[] = $this->_name . '.frq'; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
420 |
$this->_files[] = $this->_name . '.prx'; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
421 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
422 |
$this->_prevTerm = null; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
423 |
$this->_prevTermInfo = null; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
424 |
$this->_prevIndexTerm = null; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
425 |
$this->_prevIndexTermInfo = null; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
426 |
$this->_lastIndexPosition = 24; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
427 |
$this->_termCount = 0; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
428 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
429 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
430 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
431 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
432 |
* Add term |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
433 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
434 |
* Term positions is an array( docId => array(pos1, pos2, pos3, ...), ... ) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
435 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
436 |
* @param Zend_Search_Lucene_Index_Term $termEntry |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
437 |
* @param array $termDocs |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
438 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
439 |
public function addTerm($termEntry, $termDocs) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
440 |
{ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
441 |
$freqPointer = $this->_frqFile->tell(); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
442 |
$proxPointer = $this->_prxFile->tell(); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
443 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
444 |
$prevDoc = 0; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
445 |
foreach ($termDocs as $docId => $termPositions) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
446 |
$docDelta = ($docId - $prevDoc)*2; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
447 |
$prevDoc = $docId; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
448 |
if (count($termPositions) > 1) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
449 |
$this->_frqFile->writeVInt($docDelta); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
450 |
$this->_frqFile->writeVInt(count($termPositions)); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
451 |
} else { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
452 |
$this->_frqFile->writeVInt($docDelta + 1); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
453 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
454 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
455 |
$prevPosition = 0; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
456 |
foreach ($termPositions as $position) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
457 |
$this->_prxFile->writeVInt($position - $prevPosition); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
458 |
$prevPosition = $position; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
459 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
460 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
461 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
462 |
if (count($termDocs) >= self::$skipInterval) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
463 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
464 |
* @todo Write Skip Data to a freq file. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
465 |
* It's not used now, but make index more optimal |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
466 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
467 |
$skipOffset = $this->_frqFile->tell() - $freqPointer; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
468 |
} else { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
469 |
$skipOffset = 0; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
470 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
471 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
472 |
$term = new Zend_Search_Lucene_Index_Term($termEntry->text, |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
473 |
$this->_fields[$termEntry->field]->number); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
474 |
$termInfo = new Zend_Search_Lucene_Index_TermInfo(count($termDocs), |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
475 |
$freqPointer, $proxPointer, $skipOffset); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
476 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
477 |
$this->_dumpTermDictEntry($this->_tisFile, $this->_prevTerm, $term, $this->_prevTermInfo, $termInfo); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
478 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
479 |
if (($this->_termCount + 1) % self::$indexInterval == 0) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
480 |
$this->_dumpTermDictEntry($this->_tiiFile, $this->_prevIndexTerm, $term, $this->_prevIndexTermInfo, $termInfo); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
481 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
482 |
$indexPosition = $this->_tisFile->tell(); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
483 |
$this->_tiiFile->writeVInt($indexPosition - $this->_lastIndexPosition); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
484 |
$this->_lastIndexPosition = $indexPosition; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
485 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
486 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
487 |
$this->_termCount++; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
488 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
489 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
490 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
491 |
* Close dictionary |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
492 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
493 |
public function closeDictionaryFiles() |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
494 |
{ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
495 |
$this->_tisFile->seek(4); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
496 |
$this->_tisFile->writeLong($this->_termCount); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
497 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
498 |
$this->_tiiFile->seek(4); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
499 |
// + 1 is used to count an additional special index entry (empty term at the start of the list) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
500 |
$this->_tiiFile->writeLong(($this->_termCount - $this->_termCount % self::$indexInterval)/self::$indexInterval + 1); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
501 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
502 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
503 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
504 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
505 |
* Dump Term Dictionary segment file entry. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
506 |
* Used to write entry to .tis or .tii files |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
507 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
508 |
* @param Zend_Search_Lucene_Storage_File $dicFile |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
509 |
* @param Zend_Search_Lucene_Index_Term $prevTerm |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
510 |
* @param Zend_Search_Lucene_Index_Term $term |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
511 |
* @param Zend_Search_Lucene_Index_TermInfo $prevTermInfo |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
512 |
* @param Zend_Search_Lucene_Index_TermInfo $termInfo |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
513 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
514 |
protected function _dumpTermDictEntry(Zend_Search_Lucene_Storage_File $dicFile, |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
515 |
&$prevTerm, Zend_Search_Lucene_Index_Term $term, |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
516 |
&$prevTermInfo, Zend_Search_Lucene_Index_TermInfo $termInfo) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
517 |
{ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
518 |
if (isset($prevTerm) && $prevTerm->field == $term->field) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
519 |
$matchedBytes = 0; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
520 |
$maxBytes = min(strlen($prevTerm->text), strlen($term->text)); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
521 |
while ($matchedBytes < $maxBytes && |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
522 |
$prevTerm->text[$matchedBytes] == $term->text[$matchedBytes]) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
523 |
$matchedBytes++; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
524 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
525 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
526 |
// Calculate actual matched UTF-8 pattern |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
527 |
$prefixBytes = 0; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
528 |
$prefixChars = 0; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
529 |
while ($prefixBytes < $matchedBytes) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
530 |
$charBytes = 1; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
531 |
if ((ord($term->text[$prefixBytes]) & 0xC0) == 0xC0) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
532 |
$charBytes++; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
533 |
if (ord($term->text[$prefixBytes]) & 0x20 ) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
534 |
$charBytes++; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
535 |
if (ord($term->text[$prefixBytes]) & 0x10 ) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
536 |
$charBytes++; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
537 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
538 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
539 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
540 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
541 |
if ($prefixBytes + $charBytes > $matchedBytes) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
542 |
// char crosses matched bytes boundary |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
543 |
// skip char |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
544 |
break; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
545 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
546 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
547 |
$prefixChars++; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
548 |
$prefixBytes += $charBytes; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
549 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
550 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
551 |
// Write preffix length |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
552 |
$dicFile->writeVInt($prefixChars); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
553 |
// Write suffix |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
554 |
$dicFile->writeString(substr($term->text, $prefixBytes)); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
555 |
} else { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
556 |
// Write preffix length |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
557 |
$dicFile->writeVInt(0); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
558 |
// Write suffix |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
559 |
$dicFile->writeString($term->text); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
560 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
561 |
// Write field number |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
562 |
$dicFile->writeVInt($term->field); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
563 |
// DocFreq (the count of documents which contain the term) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
564 |
$dicFile->writeVInt($termInfo->docFreq); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
565 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
566 |
$prevTerm = $term; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
567 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
568 |
if (!isset($prevTermInfo)) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
569 |
// Write FreqDelta |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
570 |
$dicFile->writeVInt($termInfo->freqPointer); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
571 |
// Write ProxDelta |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
572 |
$dicFile->writeVInt($termInfo->proxPointer); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
573 |
} else { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
574 |
// Write FreqDelta |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
575 |
$dicFile->writeVInt($termInfo->freqPointer - $prevTermInfo->freqPointer); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
576 |
// Write ProxDelta |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
577 |
$dicFile->writeVInt($termInfo->proxPointer - $prevTermInfo->proxPointer); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
578 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
579 |
// Write SkipOffset - it's not 0 when $termInfo->docFreq > self::$skipInterval |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
580 |
if ($termInfo->skipOffset != 0) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
581 |
$dicFile->writeVInt($termInfo->skipOffset); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
582 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
583 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
584 |
$prevTermInfo = $termInfo; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
585 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
586 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
587 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
588 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
589 |
* Generate compound index file |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
590 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
591 |
protected function _generateCFS() |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
592 |
{ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
593 |
$cfsFile = $this->_directory->createFile($this->_name . '.cfs'); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
594 |
$cfsFile->writeVInt(count($this->_files)); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
595 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
596 |
$dataOffsetPointers = array(); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
597 |
foreach ($this->_files as $fileName) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
598 |
$dataOffsetPointers[$fileName] = $cfsFile->tell(); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
599 |
$cfsFile->writeLong(0); // write dummy data |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
600 |
$cfsFile->writeString($fileName); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
601 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
602 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
603 |
foreach ($this->_files as $fileName) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
604 |
// Get actual data offset |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
605 |
$dataOffset = $cfsFile->tell(); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
606 |
// Seek to the data offset pointer |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
607 |
$cfsFile->seek($dataOffsetPointers[$fileName]); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
608 |
// Write actual data offset value |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
609 |
$cfsFile->writeLong($dataOffset); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
610 |
// Seek back to the end of file |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
611 |
$cfsFile->seek($dataOffset); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
612 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
613 |
$dataFile = $this->_directory->getFileObject($fileName); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
614 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
615 |
$byteCount = $this->_directory->fileLength($fileName); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
616 |
while ($byteCount > 0) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
617 |
$data = $dataFile->readBytes(min($byteCount, 131072 /*128Kb*/)); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
618 |
$byteCount -= strlen($data); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
619 |
$cfsFile->writeBytes($data); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
620 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
621 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
622 |
$this->_directory->deleteFile($fileName); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
623 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
624 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
625 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
626 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
627 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
628 |
* Close segment, write it to disk and return segment info |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
629 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
630 |
* @return Zend_Search_Lucene_Index_SegmentInfo |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
631 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
632 |
abstract public function close(); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
633 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
634 |