|
1 <?php |
|
2 /** |
|
3 * Zend Framework |
|
4 * |
|
5 * LICENSE |
|
6 * |
|
7 * This source file is subject to the new BSD license that is bundled |
|
8 * with this package in the file LICENSE.txt. |
|
9 * It is also available through the world-wide-web at this URL: |
|
10 * http://framework.zend.com/license/new-bsd |
|
11 * If you did not receive a copy of the license and are unable to |
|
12 * obtain it through the world-wide-web, please send an email |
|
13 * to license@zend.com so we can send you a copy immediately. |
|
14 * |
|
15 * @category Zend |
|
16 * @package Zend_Search_Lucene |
|
17 * @subpackage Storage |
|
18 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
19 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
20 * @version $Id: File.php 20096 2010-01-06 02:05:09Z bkarwin $ |
|
21 */ |
|
22 |
|
23 /** |
|
24 * @category Zend |
|
25 * @package Zend_Search_Lucene |
|
26 * @subpackage Storage |
|
27 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
28 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
29 */ |
|
30 abstract class Zend_Search_Lucene_Storage_File |
|
31 { |
|
32 /** |
|
33 * Reads $length number of bytes at the current position in the |
|
34 * file and advances the file pointer. |
|
35 * |
|
36 * @param integer $length |
|
37 * @return string |
|
38 */ |
|
39 abstract protected function _fread($length=1); |
|
40 |
|
41 |
|
42 /** |
|
43 * Sets the file position indicator and advances the file pointer. |
|
44 * The new position, measured in bytes from the beginning of the file, |
|
45 * is obtained by adding offset to the position specified by whence, |
|
46 * whose values are defined as follows: |
|
47 * SEEK_SET - Set position equal to offset bytes. |
|
48 * SEEK_CUR - Set position to current location plus offset. |
|
49 * SEEK_END - Set position to end-of-file plus offset. (To move to |
|
50 * a position before the end-of-file, you need to pass a negative value |
|
51 * in offset.) |
|
52 * Upon success, returns 0; otherwise, returns -1 |
|
53 * |
|
54 * @param integer $offset |
|
55 * @param integer $whence |
|
56 * @return integer |
|
57 */ |
|
58 abstract public function seek($offset, $whence=SEEK_SET); |
|
59 |
|
60 /** |
|
61 * Get file position. |
|
62 * |
|
63 * @return integer |
|
64 */ |
|
65 abstract public function tell(); |
|
66 |
|
67 /** |
|
68 * Flush output. |
|
69 * |
|
70 * Returns true on success or false on failure. |
|
71 * |
|
72 * @return boolean |
|
73 */ |
|
74 abstract public function flush(); |
|
75 |
|
76 /** |
|
77 * Writes $length number of bytes (all, if $length===null) to the end |
|
78 * of the file. |
|
79 * |
|
80 * @param string $data |
|
81 * @param integer $length |
|
82 */ |
|
83 abstract protected function _fwrite($data, $length=null); |
|
84 |
|
85 /** |
|
86 * Lock file |
|
87 * |
|
88 * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock) |
|
89 * |
|
90 * @param integer $lockType |
|
91 * @return boolean |
|
92 */ |
|
93 abstract public function lock($lockType, $nonBlockinLock = false); |
|
94 |
|
95 /** |
|
96 * Unlock file |
|
97 */ |
|
98 abstract public function unlock(); |
|
99 |
|
100 /** |
|
101 * Reads a byte from the current position in the file |
|
102 * and advances the file pointer. |
|
103 * |
|
104 * @return integer |
|
105 */ |
|
106 public function readByte() |
|
107 { |
|
108 return ord($this->_fread(1)); |
|
109 } |
|
110 |
|
111 /** |
|
112 * Writes a byte to the end of the file. |
|
113 * |
|
114 * @param integer $byte |
|
115 */ |
|
116 public function writeByte($byte) |
|
117 { |
|
118 return $this->_fwrite(chr($byte), 1); |
|
119 } |
|
120 |
|
121 /** |
|
122 * Read num bytes from the current position in the file |
|
123 * and advances the file pointer. |
|
124 * |
|
125 * @param integer $num |
|
126 * @return string |
|
127 */ |
|
128 public function readBytes($num) |
|
129 { |
|
130 return $this->_fread($num); |
|
131 } |
|
132 |
|
133 /** |
|
134 * Writes num bytes of data (all, if $num===null) to the end |
|
135 * of the string. |
|
136 * |
|
137 * @param string $data |
|
138 * @param integer $num |
|
139 */ |
|
140 public function writeBytes($data, $num=null) |
|
141 { |
|
142 $this->_fwrite($data, $num); |
|
143 } |
|
144 |
|
145 |
|
146 /** |
|
147 * Reads an integer from the current position in the file |
|
148 * and advances the file pointer. |
|
149 * |
|
150 * @return integer |
|
151 */ |
|
152 public function readInt() |
|
153 { |
|
154 $str = $this->_fread(4); |
|
155 |
|
156 return ord($str[0]) << 24 | |
|
157 ord($str[1]) << 16 | |
|
158 ord($str[2]) << 8 | |
|
159 ord($str[3]); |
|
160 } |
|
161 |
|
162 |
|
163 /** |
|
164 * Writes an integer to the end of file. |
|
165 * |
|
166 * @param integer $value |
|
167 */ |
|
168 public function writeInt($value) |
|
169 { |
|
170 settype($value, 'integer'); |
|
171 $this->_fwrite( chr($value>>24 & 0xFF) . |
|
172 chr($value>>16 & 0xFF) . |
|
173 chr($value>>8 & 0xFF) . |
|
174 chr($value & 0xFF), 4 ); |
|
175 } |
|
176 |
|
177 |
|
178 /** |
|
179 * Returns a long integer from the current position in the file |
|
180 * and advances the file pointer. |
|
181 * |
|
182 * @return integer|float |
|
183 * @throws Zend_Search_Lucene_Exception |
|
184 */ |
|
185 public function readLong() |
|
186 { |
|
187 /** |
|
188 * Check, that we work in 64-bit mode. |
|
189 * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb |
|
190 */ |
|
191 if (PHP_INT_SIZE > 4) { |
|
192 $str = $this->_fread(8); |
|
193 |
|
194 return ord($str[0]) << 56 | |
|
195 ord($str[1]) << 48 | |
|
196 ord($str[2]) << 40 | |
|
197 ord($str[3]) << 32 | |
|
198 ord($str[4]) << 24 | |
|
199 ord($str[5]) << 16 | |
|
200 ord($str[6]) << 8 | |
|
201 ord($str[7]); |
|
202 } else { |
|
203 return $this->readLong32Bit(); |
|
204 } |
|
205 } |
|
206 |
|
207 /** |
|
208 * Writes long integer to the end of file |
|
209 * |
|
210 * @param integer $value |
|
211 * @throws Zend_Search_Lucene_Exception |
|
212 */ |
|
213 public function writeLong($value) |
|
214 { |
|
215 /** |
|
216 * Check, that we work in 64-bit mode. |
|
217 * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb |
|
218 */ |
|
219 if (PHP_INT_SIZE > 4) { |
|
220 settype($value, 'integer'); |
|
221 $this->_fwrite( chr($value>>56 & 0xFF) . |
|
222 chr($value>>48 & 0xFF) . |
|
223 chr($value>>40 & 0xFF) . |
|
224 chr($value>>32 & 0xFF) . |
|
225 chr($value>>24 & 0xFF) . |
|
226 chr($value>>16 & 0xFF) . |
|
227 chr($value>>8 & 0xFF) . |
|
228 chr($value & 0xFF), 8 ); |
|
229 } else { |
|
230 $this->writeLong32Bit($value); |
|
231 } |
|
232 } |
|
233 |
|
234 |
|
235 /** |
|
236 * Returns a long integer from the current position in the file, |
|
237 * advances the file pointer and return it as float (for 32-bit platforms). |
|
238 * |
|
239 * @return integer|float |
|
240 * @throws Zend_Search_Lucene_Exception |
|
241 */ |
|
242 public function readLong32Bit() |
|
243 { |
|
244 $wordHigh = $this->readInt(); |
|
245 $wordLow = $this->readInt(); |
|
246 |
|
247 if ($wordHigh & (int)0x80000000) { |
|
248 // It's a negative value since the highest bit is set |
|
249 if ($wordHigh == (int)0xFFFFFFFF && ($wordLow & (int)0x80000000)) { |
|
250 return $wordLow; |
|
251 } else { |
|
252 require_once 'Zend/Search/Lucene/Exception.php'; |
|
253 throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.'); |
|
254 } |
|
255 |
|
256 } |
|
257 |
|
258 if ($wordLow < 0) { |
|
259 // Value is large than 0x7FFF FFFF. Represent low word as float. |
|
260 $wordLow &= 0x7FFFFFFF; |
|
261 $wordLow += (float)0x80000000; |
|
262 } |
|
263 |
|
264 if ($wordHigh == 0) { |
|
265 // Return value as integer if possible |
|
266 return $wordLow; |
|
267 } |
|
268 |
|
269 return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow; |
|
270 } |
|
271 |
|
272 |
|
273 /** |
|
274 * Writes long integer to the end of file (32-bit platforms implementation) |
|
275 * |
|
276 * @param integer|float $value |
|
277 * @throws Zend_Search_Lucene_Exception |
|
278 */ |
|
279 public function writeLong32Bit($value) |
|
280 { |
|
281 if ($value < (int)0x80000000) { |
|
282 require_once 'Zend/Search/Lucene/Exception.php'; |
|
283 throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.'); |
|
284 } |
|
285 |
|
286 if ($value < 0) { |
|
287 $wordHigh = (int)0xFFFFFFFF; |
|
288 $wordLow = (int)$value; |
|
289 } else { |
|
290 $wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */); |
|
291 $wordLow = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */; |
|
292 |
|
293 if ($wordLow > 0x7FFFFFFF) { |
|
294 // Highest bit of low word is set. Translate it to the corresponding negative integer value |
|
295 $wordLow -= 0x80000000; |
|
296 $wordLow |= 0x80000000; |
|
297 } |
|
298 } |
|
299 |
|
300 $this->writeInt($wordHigh); |
|
301 $this->writeInt($wordLow); |
|
302 } |
|
303 |
|
304 |
|
305 /** |
|
306 * Returns a variable-length integer from the current |
|
307 * position in the file and advances the file pointer. |
|
308 * |
|
309 * @return integer |
|
310 */ |
|
311 public function readVInt() |
|
312 { |
|
313 $nextByte = ord($this->_fread(1)); |
|
314 $val = $nextByte & 0x7F; |
|
315 |
|
316 for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) { |
|
317 $nextByte = ord($this->_fread(1)); |
|
318 $val |= ($nextByte & 0x7F) << $shift; |
|
319 } |
|
320 return $val; |
|
321 } |
|
322 |
|
323 /** |
|
324 * Writes a variable-length integer to the end of file. |
|
325 * |
|
326 * @param integer $value |
|
327 */ |
|
328 public function writeVInt($value) |
|
329 { |
|
330 settype($value, 'integer'); |
|
331 while ($value > 0x7F) { |
|
332 $this->_fwrite(chr( ($value & 0x7F)|0x80 )); |
|
333 $value >>= 7; |
|
334 } |
|
335 $this->_fwrite(chr($value)); |
|
336 } |
|
337 |
|
338 |
|
339 /** |
|
340 * Reads a string from the current position in the file |
|
341 * and advances the file pointer. |
|
342 * |
|
343 * @return string |
|
344 */ |
|
345 public function readString() |
|
346 { |
|
347 $strlen = $this->readVInt(); |
|
348 if ($strlen == 0) { |
|
349 return ''; |
|
350 } else { |
|
351 /** |
|
352 * This implementation supports only Basic Multilingual Plane |
|
353 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support |
|
354 * "supplementary characters" (characters whose code points are |
|
355 * greater than 0xFFFF) |
|
356 * Java 2 represents these characters as a pair of char (16-bit) |
|
357 * values, the first from the high-surrogates range (0xD800-0xDBFF), |
|
358 * the second from the low-surrogates range (0xDC00-0xDFFF). Then |
|
359 * they are encoded as usual UTF-8 characters in six bytes. |
|
360 * Standard UTF-8 representation uses four bytes for supplementary |
|
361 * characters. |
|
362 */ |
|
363 |
|
364 $str_val = $this->_fread($strlen); |
|
365 |
|
366 for ($count = 0; $count < $strlen; $count++ ) { |
|
367 if (( ord($str_val[$count]) & 0xC0 ) == 0xC0) { |
|
368 $addBytes = 1; |
|
369 if (ord($str_val[$count]) & 0x20 ) { |
|
370 $addBytes++; |
|
371 |
|
372 // Never used. Java2 doesn't encode strings in four bytes |
|
373 if (ord($str_val[$count]) & 0x10 ) { |
|
374 $addBytes++; |
|
375 } |
|
376 } |
|
377 $str_val .= $this->_fread($addBytes); |
|
378 $strlen += $addBytes; |
|
379 |
|
380 // Check for null character. Java2 encodes null character |
|
381 // in two bytes. |
|
382 if (ord($str_val[$count]) == 0xC0 && |
|
383 ord($str_val[$count+1]) == 0x80 ) { |
|
384 $str_val[$count] = 0; |
|
385 $str_val = substr($str_val,0,$count+1) |
|
386 . substr($str_val,$count+2); |
|
387 } |
|
388 $count += $addBytes; |
|
389 } |
|
390 } |
|
391 |
|
392 return $str_val; |
|
393 } |
|
394 } |
|
395 |
|
396 /** |
|
397 * Writes a string to the end of file. |
|
398 * |
|
399 * @param string $str |
|
400 * @throws Zend_Search_Lucene_Exception |
|
401 */ |
|
402 public function writeString($str) |
|
403 { |
|
404 /** |
|
405 * This implementation supports only Basic Multilingual Plane |
|
406 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support |
|
407 * "supplementary characters" (characters whose code points are |
|
408 * greater than 0xFFFF) |
|
409 * Java 2 represents these characters as a pair of char (16-bit) |
|
410 * values, the first from the high-surrogates range (0xD800-0xDBFF), |
|
411 * the second from the low-surrogates range (0xDC00-0xDFFF). Then |
|
412 * they are encoded as usual UTF-8 characters in six bytes. |
|
413 * Standard UTF-8 representation uses four bytes for supplementary |
|
414 * characters. |
|
415 */ |
|
416 |
|
417 // convert input to a string before iterating string characters |
|
418 settype($str, 'string'); |
|
419 |
|
420 $chars = $strlen = strlen($str); |
|
421 $containNullChars = false; |
|
422 |
|
423 for ($count = 0; $count < $strlen; $count++ ) { |
|
424 /** |
|
425 * String is already in Java 2 representation. |
|
426 * We should only calculate actual string length and replace |
|
427 * \x00 by \xC0\x80 |
|
428 */ |
|
429 if ((ord($str[$count]) & 0xC0) == 0xC0) { |
|
430 $addBytes = 1; |
|
431 if (ord($str[$count]) & 0x20 ) { |
|
432 $addBytes++; |
|
433 |
|
434 // Never used. Java2 doesn't encode strings in four bytes |
|
435 // and we dont't support non-BMP characters |
|
436 if (ord($str[$count]) & 0x10 ) { |
|
437 $addBytes++; |
|
438 } |
|
439 } |
|
440 $chars -= $addBytes; |
|
441 |
|
442 if (ord($str[$count]) == 0 ) { |
|
443 $containNullChars = true; |
|
444 } |
|
445 $count += $addBytes; |
|
446 } |
|
447 } |
|
448 |
|
449 if ($chars < 0) { |
|
450 require_once 'Zend/Search/Lucene/Exception.php'; |
|
451 throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string'); |
|
452 } |
|
453 |
|
454 $this->writeVInt($chars); |
|
455 if ($containNullChars) { |
|
456 $this->_fwrite(str_replace($str, "\x00", "\xC0\x80")); |
|
457 } else { |
|
458 $this->_fwrite($str); |
|
459 } |
|
460 } |
|
461 |
|
462 |
|
463 /** |
|
464 * Reads binary data from the current position in the file |
|
465 * and advances the file pointer. |
|
466 * |
|
467 * @return string |
|
468 */ |
|
469 public function readBinary() |
|
470 { |
|
471 return $this->_fread($this->readVInt()); |
|
472 } |
|
473 } |