|
1 <?php |
|
2 /** |
|
3 * Zend Framework |
|
4 * |
|
5 * LICENSE |
|
6 * |
|
7 * This source file is subject to the new BSD license that is bundled |
|
8 * with this package in the file LICENSE.txt. |
|
9 * It is also available through the world-wide-web at this URL: |
|
10 * http://framework.zend.com/license/new-bsd |
|
11 * If you did not receive a copy of the license and are unable to |
|
12 * obtain it through the world-wide-web, please send an email |
|
13 * to license@zend.com so we can send you a copy immediately. |
|
14 * |
|
15 * @category Zend |
|
16 * @package Zend_Search_Lucene |
|
17 * @subpackage Document |
|
18 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
19 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
20 * @version $Id: Field.php 20096 2010-01-06 02:05:09Z bkarwin $ |
|
21 */ |
|
22 |
|
23 |
|
24 /** |
|
25 * A field is a section of a Document. Each field has two parts, |
|
26 * a name and a value. Values may be free text or they may be atomic |
|
27 * keywords, which are not further processed. Such keywords may |
|
28 * be used to represent dates, urls, etc. Fields are optionally |
|
29 * stored in the index, so that they may be returned with hits |
|
30 * on the document. |
|
31 * |
|
32 * @category Zend |
|
33 * @package Zend_Search_Lucene |
|
34 * @subpackage Document |
|
35 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
36 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
37 */ |
|
38 class Zend_Search_Lucene_Field |
|
39 { |
|
40 /** |
|
41 * Field name |
|
42 * |
|
43 * @var string |
|
44 */ |
|
45 public $name; |
|
46 |
|
47 /** |
|
48 * Field value |
|
49 * |
|
50 * @var boolean |
|
51 */ |
|
52 public $value; |
|
53 |
|
54 /** |
|
55 * Field is to be stored in the index for return with search hits. |
|
56 * |
|
57 * @var boolean |
|
58 */ |
|
59 public $isStored = false; |
|
60 |
|
61 /** |
|
62 * Field is to be indexed, so that it may be searched on. |
|
63 * |
|
64 * @var boolean |
|
65 */ |
|
66 public $isIndexed = true; |
|
67 |
|
68 /** |
|
69 * Field should be tokenized as text prior to indexing. |
|
70 * |
|
71 * @var boolean |
|
72 */ |
|
73 public $isTokenized = true; |
|
74 /** |
|
75 * Field is stored as binary. |
|
76 * |
|
77 * @var boolean |
|
78 */ |
|
79 public $isBinary = false; |
|
80 |
|
81 /** |
|
82 * Field are stored as a term vector |
|
83 * |
|
84 * @var boolean |
|
85 */ |
|
86 public $storeTermVector = false; |
|
87 |
|
88 /** |
|
89 * Field boost factor |
|
90 * It's not stored directly in the index, but affects on normalization factor |
|
91 * |
|
92 * @var float |
|
93 */ |
|
94 public $boost = 1.0; |
|
95 |
|
96 /** |
|
97 * Field value encoding. |
|
98 * |
|
99 * @var string |
|
100 */ |
|
101 public $encoding; |
|
102 |
|
103 /** |
|
104 * Object constructor |
|
105 * |
|
106 * @param string $name |
|
107 * @param string $value |
|
108 * @param string $encoding |
|
109 * @param boolean $isStored |
|
110 * @param boolean $isIndexed |
|
111 * @param boolean $isTokenized |
|
112 * @param boolean $isBinary |
|
113 */ |
|
114 public function __construct($name, $value, $encoding, $isStored, $isIndexed, $isTokenized, $isBinary = false) |
|
115 { |
|
116 $this->name = $name; |
|
117 $this->value = $value; |
|
118 |
|
119 if (!$isBinary) { |
|
120 $this->encoding = $encoding; |
|
121 $this->isTokenized = $isTokenized; |
|
122 } else { |
|
123 $this->encoding = ''; |
|
124 $this->isTokenized = false; |
|
125 } |
|
126 |
|
127 $this->isStored = $isStored; |
|
128 $this->isIndexed = $isIndexed; |
|
129 $this->isBinary = $isBinary; |
|
130 |
|
131 $this->storeTermVector = false; |
|
132 $this->boost = 1.0; |
|
133 } |
|
134 |
|
135 |
|
136 /** |
|
137 * Constructs a String-valued Field that is not tokenized, but is indexed |
|
138 * and stored. Useful for non-text fields, e.g. date or url. |
|
139 * |
|
140 * @param string $name |
|
141 * @param string $value |
|
142 * @param string $encoding |
|
143 * @return Zend_Search_Lucene_Field |
|
144 */ |
|
145 public static function keyword($name, $value, $encoding = '') |
|
146 { |
|
147 return new self($name, $value, $encoding, true, true, false); |
|
148 } |
|
149 |
|
150 |
|
151 /** |
|
152 * Constructs a String-valued Field that is not tokenized nor indexed, |
|
153 * but is stored in the index, for return with hits. |
|
154 * |
|
155 * @param string $name |
|
156 * @param string $value |
|
157 * @param string $encoding |
|
158 * @return Zend_Search_Lucene_Field |
|
159 */ |
|
160 public static function unIndexed($name, $value, $encoding = '') |
|
161 { |
|
162 return new self($name, $value, $encoding, true, false, false); |
|
163 } |
|
164 |
|
165 |
|
166 /** |
|
167 * Constructs a Binary String valued Field that is not tokenized nor indexed, |
|
168 * but is stored in the index, for return with hits. |
|
169 * |
|
170 * @param string $name |
|
171 * @param string $value |
|
172 * @param string $encoding |
|
173 * @return Zend_Search_Lucene_Field |
|
174 */ |
|
175 public static function binary($name, $value) |
|
176 { |
|
177 return new self($name, $value, '', true, false, false, true); |
|
178 } |
|
179 |
|
180 /** |
|
181 * Constructs a String-valued Field that is tokenized and indexed, |
|
182 * and is stored in the index, for return with hits. Useful for short text |
|
183 * fields, like "title" or "subject". Term vector will not be stored for this field. |
|
184 * |
|
185 * @param string $name |
|
186 * @param string $value |
|
187 * @param string $encoding |
|
188 * @return Zend_Search_Lucene_Field |
|
189 */ |
|
190 public static function text($name, $value, $encoding = '') |
|
191 { |
|
192 return new self($name, $value, $encoding, true, true, true); |
|
193 } |
|
194 |
|
195 |
|
196 /** |
|
197 * Constructs a String-valued Field that is tokenized and indexed, |
|
198 * but that is not stored in the index. |
|
199 * |
|
200 * @param string $name |
|
201 * @param string $value |
|
202 * @param string $encoding |
|
203 * @return Zend_Search_Lucene_Field |
|
204 */ |
|
205 public static function unStored($name, $value, $encoding = '') |
|
206 { |
|
207 return new self($name, $value, $encoding, false, true, true); |
|
208 } |
|
209 |
|
210 /** |
|
211 * Get field value in UTF-8 encoding |
|
212 * |
|
213 * @return string |
|
214 */ |
|
215 public function getUtf8Value() |
|
216 { |
|
217 if (strcasecmp($this->encoding, 'utf8' ) == 0 || |
|
218 strcasecmp($this->encoding, 'utf-8') == 0 ) { |
|
219 return $this->value; |
|
220 } else { |
|
221 |
|
222 return (PHP_OS != 'AIX') ? iconv($this->encoding, 'UTF-8', $this->value) : iconv('ISO8859-1', 'UTF-8', $this->value); |
|
223 } |
|
224 } |
|
225 } |
|
226 |