|
1 <?php |
|
2 /** |
|
3 * Zend Framework |
|
4 * |
|
5 * LICENSE |
|
6 * |
|
7 * This source file is subject to the new BSD license that is bundled |
|
8 * with this package in the file LICENSE.txt. |
|
9 * It is also available through the world-wide-web at this URL: |
|
10 * http://framework.zend.com/license/new-bsd |
|
11 * If you did not receive a copy of the license and are unable to |
|
12 * obtain it through the world-wide-web, please send an email |
|
13 * to license@zend.com so we can send you a copy immediately. |
|
14 * |
|
15 * @category Zend |
|
16 * @package Zend_Search_Lucene |
|
17 * @subpackage Storage |
|
18 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
19 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
20 * @version $Id: Memory.php 20096 2010-01-06 02:05:09Z bkarwin $ |
|
21 */ |
|
22 |
|
23 /** Zend_Search_Lucene_Storage_File */ |
|
24 require_once 'Zend/Search/Lucene/Storage/File.php'; |
|
25 |
|
26 /** |
|
27 * @category Zend |
|
28 * @package Zend_Search_Lucene |
|
29 * @subpackage Storage |
|
30 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
31 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
32 */ |
|
33 class Zend_Search_Lucene_Storage_File_Memory extends Zend_Search_Lucene_Storage_File |
|
34 { |
|
35 /** |
|
36 * FileData |
|
37 * |
|
38 * @var string |
|
39 */ |
|
40 private $_data; |
|
41 |
|
42 /** |
|
43 * File Position |
|
44 * |
|
45 * @var integer |
|
46 */ |
|
47 private $_position = 0; |
|
48 |
|
49 |
|
50 /** |
|
51 * Object constractor |
|
52 * |
|
53 * @param string $data |
|
54 */ |
|
55 public function __construct($data) |
|
56 { |
|
57 $this->_data = $data; |
|
58 } |
|
59 |
|
60 /** |
|
61 * Reads $length number of bytes at the current position in the |
|
62 * file and advances the file pointer. |
|
63 * |
|
64 * @param integer $length |
|
65 * @return string |
|
66 */ |
|
67 protected function _fread($length = 1) |
|
68 { |
|
69 $returnValue = substr($this->_data, $this->_position, $length); |
|
70 $this->_position += $length; |
|
71 return $returnValue; |
|
72 } |
|
73 |
|
74 |
|
75 /** |
|
76 * Sets the file position indicator and advances the file pointer. |
|
77 * The new position, measured in bytes from the beginning of the file, |
|
78 * is obtained by adding offset to the position specified by whence, |
|
79 * whose values are defined as follows: |
|
80 * SEEK_SET - Set position equal to offset bytes. |
|
81 * SEEK_CUR - Set position to current location plus offset. |
|
82 * SEEK_END - Set position to end-of-file plus offset. (To move to |
|
83 * a position before the end-of-file, you need to pass a negative value |
|
84 * in offset.) |
|
85 * Upon success, returns 0; otherwise, returns -1 |
|
86 * |
|
87 * @param integer $offset |
|
88 * @param integer $whence |
|
89 * @return integer |
|
90 */ |
|
91 public function seek($offset, $whence=SEEK_SET) |
|
92 { |
|
93 switch ($whence) { |
|
94 case SEEK_SET: |
|
95 $this->_position = $offset; |
|
96 break; |
|
97 |
|
98 case SEEK_CUR: |
|
99 $this->_position += $offset; |
|
100 break; |
|
101 |
|
102 case SEEK_END: |
|
103 $this->_position = strlen($this->_data); |
|
104 $this->_position += $offset; |
|
105 break; |
|
106 |
|
107 default: |
|
108 break; |
|
109 } |
|
110 } |
|
111 |
|
112 /** |
|
113 * Get file position. |
|
114 * |
|
115 * @return integer |
|
116 */ |
|
117 public function tell() |
|
118 { |
|
119 return $this->_position; |
|
120 } |
|
121 |
|
122 /** |
|
123 * Flush output. |
|
124 * |
|
125 * Returns true on success or false on failure. |
|
126 * |
|
127 * @return boolean |
|
128 */ |
|
129 public function flush() |
|
130 { |
|
131 // Do nothing |
|
132 |
|
133 return true; |
|
134 } |
|
135 |
|
136 /** |
|
137 * Writes $length number of bytes (all, if $length===null) to the end |
|
138 * of the file. |
|
139 * |
|
140 * @param string $data |
|
141 * @param integer $length |
|
142 */ |
|
143 protected function _fwrite($data, $length=null) |
|
144 { |
|
145 // We do not need to check if file position points to the end of "file". |
|
146 // Only append operation is supported now |
|
147 |
|
148 if ($length !== null) { |
|
149 $this->_data .= substr($data, 0, $length); |
|
150 } else { |
|
151 $this->_data .= $data; |
|
152 } |
|
153 |
|
154 $this->_position = strlen($this->_data); |
|
155 } |
|
156 |
|
157 /** |
|
158 * Lock file |
|
159 * |
|
160 * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock) |
|
161 * |
|
162 * @param integer $lockType |
|
163 * @return boolean |
|
164 */ |
|
165 public function lock($lockType, $nonBlockinLock = false) |
|
166 { |
|
167 // Memory files can't be shared |
|
168 // do nothing |
|
169 |
|
170 return true; |
|
171 } |
|
172 |
|
173 /** |
|
174 * Unlock file |
|
175 */ |
|
176 public function unlock() |
|
177 { |
|
178 // Memory files can't be shared |
|
179 // do nothing |
|
180 } |
|
181 |
|
182 /** |
|
183 * Reads a byte from the current position in the file |
|
184 * and advances the file pointer. |
|
185 * |
|
186 * @return integer |
|
187 */ |
|
188 public function readByte() |
|
189 { |
|
190 return ord($this->_data[$this->_position++]); |
|
191 } |
|
192 |
|
193 /** |
|
194 * Writes a byte to the end of the file. |
|
195 * |
|
196 * @param integer $byte |
|
197 */ |
|
198 public function writeByte($byte) |
|
199 { |
|
200 // We do not need to check if file position points to the end of "file". |
|
201 // Only append operation is supported now |
|
202 |
|
203 $this->_data .= chr($byte); |
|
204 $this->_position = strlen($this->_data); |
|
205 |
|
206 return 1; |
|
207 } |
|
208 |
|
209 /** |
|
210 * Read num bytes from the current position in the file |
|
211 * and advances the file pointer. |
|
212 * |
|
213 * @param integer $num |
|
214 * @return string |
|
215 */ |
|
216 public function readBytes($num) |
|
217 { |
|
218 $returnValue = substr($this->_data, $this->_position, $num); |
|
219 $this->_position += $num; |
|
220 |
|
221 return $returnValue; |
|
222 } |
|
223 |
|
224 /** |
|
225 * Writes num bytes of data (all, if $num===null) to the end |
|
226 * of the string. |
|
227 * |
|
228 * @param string $data |
|
229 * @param integer $num |
|
230 */ |
|
231 public function writeBytes($data, $num=null) |
|
232 { |
|
233 // We do not need to check if file position points to the end of "file". |
|
234 // Only append operation is supported now |
|
235 |
|
236 if ($num !== null) { |
|
237 $this->_data .= substr($data, 0, $num); |
|
238 } else { |
|
239 $this->_data .= $data; |
|
240 } |
|
241 |
|
242 $this->_position = strlen($this->_data); |
|
243 } |
|
244 |
|
245 |
|
246 /** |
|
247 * Reads an integer from the current position in the file |
|
248 * and advances the file pointer. |
|
249 * |
|
250 * @return integer |
|
251 */ |
|
252 public function readInt() |
|
253 { |
|
254 $str = substr($this->_data, $this->_position, 4); |
|
255 $this->_position += 4; |
|
256 |
|
257 return ord($str[0]) << 24 | |
|
258 ord($str[1]) << 16 | |
|
259 ord($str[2]) << 8 | |
|
260 ord($str[3]); |
|
261 } |
|
262 |
|
263 |
|
264 /** |
|
265 * Writes an integer to the end of file. |
|
266 * |
|
267 * @param integer $value |
|
268 */ |
|
269 public function writeInt($value) |
|
270 { |
|
271 // We do not need to check if file position points to the end of "file". |
|
272 // Only append operation is supported now |
|
273 |
|
274 settype($value, 'integer'); |
|
275 $this->_data .= chr($value>>24 & 0xFF) . |
|
276 chr($value>>16 & 0xFF) . |
|
277 chr($value>>8 & 0xFF) . |
|
278 chr($value & 0xFF); |
|
279 |
|
280 $this->_position = strlen($this->_data); |
|
281 } |
|
282 |
|
283 |
|
284 /** |
|
285 * Returns a long integer from the current position in the file |
|
286 * and advances the file pointer. |
|
287 * |
|
288 * @return integer |
|
289 * @throws Zend_Search_Lucene_Exception |
|
290 */ |
|
291 public function readLong() |
|
292 { |
|
293 /** |
|
294 * Check, that we work in 64-bit mode. |
|
295 * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb |
|
296 */ |
|
297 if (PHP_INT_SIZE > 4) { |
|
298 $str = substr($this->_data, $this->_position, 8); |
|
299 $this->_position += 8; |
|
300 |
|
301 return ord($str[0]) << 56 | |
|
302 ord($str[1]) << 48 | |
|
303 ord($str[2]) << 40 | |
|
304 ord($str[3]) << 32 | |
|
305 ord($str[4]) << 24 | |
|
306 ord($str[5]) << 16 | |
|
307 ord($str[6]) << 8 | |
|
308 ord($str[7]); |
|
309 } else { |
|
310 return $this->readLong32Bit(); |
|
311 } |
|
312 } |
|
313 |
|
314 /** |
|
315 * Writes long integer to the end of file |
|
316 * |
|
317 * @param integer $value |
|
318 * @throws Zend_Search_Lucene_Exception |
|
319 */ |
|
320 public function writeLong($value) |
|
321 { |
|
322 // We do not need to check if file position points to the end of "file". |
|
323 // Only append operation is supported now |
|
324 |
|
325 /** |
|
326 * Check, that we work in 64-bit mode. |
|
327 * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb |
|
328 */ |
|
329 if (PHP_INT_SIZE > 4) { |
|
330 settype($value, 'integer'); |
|
331 $this->_data .= chr($value>>56 & 0xFF) . |
|
332 chr($value>>48 & 0xFF) . |
|
333 chr($value>>40 & 0xFF) . |
|
334 chr($value>>32 & 0xFF) . |
|
335 chr($value>>24 & 0xFF) . |
|
336 chr($value>>16 & 0xFF) . |
|
337 chr($value>>8 & 0xFF) . |
|
338 chr($value & 0xFF); |
|
339 } else { |
|
340 $this->writeLong32Bit($value); |
|
341 } |
|
342 |
|
343 $this->_position = strlen($this->_data); |
|
344 } |
|
345 |
|
346 |
|
347 /** |
|
348 * Returns a long integer from the current position in the file, |
|
349 * advances the file pointer and return it as float (for 32-bit platforms). |
|
350 * |
|
351 * @return integer|float |
|
352 * @throws Zend_Search_Lucene_Exception |
|
353 */ |
|
354 public function readLong32Bit() |
|
355 { |
|
356 $wordHigh = $this->readInt(); |
|
357 $wordLow = $this->readInt(); |
|
358 |
|
359 if ($wordHigh & (int)0x80000000) { |
|
360 // It's a negative value since the highest bit is set |
|
361 if ($wordHigh == (int)0xFFFFFFFF && ($wordLow & (int)0x80000000)) { |
|
362 return $wordLow; |
|
363 } else { |
|
364 require_once 'Zend/Search/Lucene/Exception.php'; |
|
365 throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.'); |
|
366 } |
|
367 |
|
368 } |
|
369 |
|
370 if ($wordLow < 0) { |
|
371 // Value is large than 0x7FFF FFFF. Represent low word as float. |
|
372 $wordLow &= 0x7FFFFFFF; |
|
373 $wordLow += (float)0x80000000; |
|
374 } |
|
375 |
|
376 if ($wordHigh == 0) { |
|
377 // Return value as integer if possible |
|
378 return $wordLow; |
|
379 } |
|
380 |
|
381 return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow; |
|
382 } |
|
383 |
|
384 |
|
385 /** |
|
386 * Writes long integer to the end of file (32-bit platforms implementation) |
|
387 * |
|
388 * @param integer|float $value |
|
389 * @throws Zend_Search_Lucene_Exception |
|
390 */ |
|
391 public function writeLong32Bit($value) |
|
392 { |
|
393 if ($value < (int)0x80000000) { |
|
394 require_once 'Zend/Search/Lucene/Exception.php'; |
|
395 throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.'); |
|
396 } |
|
397 |
|
398 if ($value < 0) { |
|
399 $wordHigh = (int)0xFFFFFFFF; |
|
400 $wordLow = (int)$value; |
|
401 } else { |
|
402 $wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */); |
|
403 $wordLow = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */; |
|
404 |
|
405 if ($wordLow > 0x7FFFFFFF) { |
|
406 // Highest bit of low word is set. Translate it to the corresponding negative integer value |
|
407 $wordLow -= 0x80000000; |
|
408 $wordLow |= 0x80000000; |
|
409 } |
|
410 } |
|
411 |
|
412 $this->writeInt($wordHigh); |
|
413 $this->writeInt($wordLow); |
|
414 } |
|
415 |
|
416 /** |
|
417 * Returns a variable-length integer from the current |
|
418 * position in the file and advances the file pointer. |
|
419 * |
|
420 * @return integer |
|
421 */ |
|
422 public function readVInt() |
|
423 { |
|
424 $nextByte = ord($this->_data[$this->_position++]); |
|
425 $val = $nextByte & 0x7F; |
|
426 |
|
427 for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) { |
|
428 $nextByte = ord($this->_data[$this->_position++]); |
|
429 $val |= ($nextByte & 0x7F) << $shift; |
|
430 } |
|
431 return $val; |
|
432 } |
|
433 |
|
434 /** |
|
435 * Writes a variable-length integer to the end of file. |
|
436 * |
|
437 * @param integer $value |
|
438 */ |
|
439 public function writeVInt($value) |
|
440 { |
|
441 // We do not need to check if file position points to the end of "file". |
|
442 // Only append operation is supported now |
|
443 |
|
444 settype($value, 'integer'); |
|
445 while ($value > 0x7F) { |
|
446 $this->_data .= chr( ($value & 0x7F)|0x80 ); |
|
447 $value >>= 7; |
|
448 } |
|
449 $this->_data .= chr($value); |
|
450 |
|
451 $this->_position = strlen($this->_data); |
|
452 } |
|
453 |
|
454 |
|
455 /** |
|
456 * Reads a string from the current position in the file |
|
457 * and advances the file pointer. |
|
458 * |
|
459 * @return string |
|
460 */ |
|
461 public function readString() |
|
462 { |
|
463 $strlen = $this->readVInt(); |
|
464 if ($strlen == 0) { |
|
465 return ''; |
|
466 } else { |
|
467 /** |
|
468 * This implementation supports only Basic Multilingual Plane |
|
469 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support |
|
470 * "supplementary characters" (characters whose code points are |
|
471 * greater than 0xFFFF) |
|
472 * Java 2 represents these characters as a pair of char (16-bit) |
|
473 * values, the first from the high-surrogates range (0xD800-0xDBFF), |
|
474 * the second from the low-surrogates range (0xDC00-0xDFFF). Then |
|
475 * they are encoded as usual UTF-8 characters in six bytes. |
|
476 * Standard UTF-8 representation uses four bytes for supplementary |
|
477 * characters. |
|
478 */ |
|
479 |
|
480 $str_val = substr($this->_data, $this->_position, $strlen); |
|
481 $this->_position += $strlen; |
|
482 |
|
483 for ($count = 0; $count < $strlen; $count++ ) { |
|
484 if (( ord($str_val[$count]) & 0xC0 ) == 0xC0) { |
|
485 $addBytes = 1; |
|
486 if (ord($str_val[$count]) & 0x20 ) { |
|
487 $addBytes++; |
|
488 |
|
489 // Never used. Java2 doesn't encode strings in four bytes |
|
490 if (ord($str_val[$count]) & 0x10 ) { |
|
491 $addBytes++; |
|
492 } |
|
493 } |
|
494 $str_val .= substr($this->_data, $this->_position, $addBytes); |
|
495 $this->_position += $addBytes; |
|
496 $strlen += $addBytes; |
|
497 |
|
498 // Check for null character. Java2 encodes null character |
|
499 // in two bytes. |
|
500 if (ord($str_val[$count]) == 0xC0 && |
|
501 ord($str_val[$count+1]) == 0x80 ) { |
|
502 $str_val[$count] = 0; |
|
503 $str_val = substr($str_val,0,$count+1) |
|
504 . substr($str_val,$count+2); |
|
505 } |
|
506 $count += $addBytes; |
|
507 } |
|
508 } |
|
509 |
|
510 return $str_val; |
|
511 } |
|
512 } |
|
513 |
|
514 /** |
|
515 * Writes a string to the end of file. |
|
516 * |
|
517 * @param string $str |
|
518 * @throws Zend_Search_Lucene_Exception |
|
519 */ |
|
520 public function writeString($str) |
|
521 { |
|
522 /** |
|
523 * This implementation supports only Basic Multilingual Plane |
|
524 * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support |
|
525 * "supplementary characters" (characters whose code points are |
|
526 * greater than 0xFFFF) |
|
527 * Java 2 represents these characters as a pair of char (16-bit) |
|
528 * values, the first from the high-surrogates range (0xD800-0xDBFF), |
|
529 * the second from the low-surrogates range (0xDC00-0xDFFF). Then |
|
530 * they are encoded as usual UTF-8 characters in six bytes. |
|
531 * Standard UTF-8 representation uses four bytes for supplementary |
|
532 * characters. |
|
533 */ |
|
534 |
|
535 // We do not need to check if file position points to the end of "file". |
|
536 // Only append operation is supported now |
|
537 |
|
538 // convert input to a string before iterating string characters |
|
539 settype($str, 'string'); |
|
540 |
|
541 $chars = $strlen = strlen($str); |
|
542 $containNullChars = false; |
|
543 |
|
544 for ($count = 0; $count < $strlen; $count++ ) { |
|
545 /** |
|
546 * String is already in Java 2 representation. |
|
547 * We should only calculate actual string length and replace |
|
548 * \x00 by \xC0\x80 |
|
549 */ |
|
550 if ((ord($str[$count]) & 0xC0) == 0xC0) { |
|
551 $addBytes = 1; |
|
552 if (ord($str[$count]) & 0x20 ) { |
|
553 $addBytes++; |
|
554 |
|
555 // Never used. Java2 doesn't encode strings in four bytes |
|
556 // and we dont't support non-BMP characters |
|
557 if (ord($str[$count]) & 0x10 ) { |
|
558 $addBytes++; |
|
559 } |
|
560 } |
|
561 $chars -= $addBytes; |
|
562 |
|
563 if (ord($str[$count]) == 0 ) { |
|
564 $containNullChars = true; |
|
565 } |
|
566 $count += $addBytes; |
|
567 } |
|
568 } |
|
569 |
|
570 if ($chars < 0) { |
|
571 require_once 'Zend/Search/Lucene/Exception.php'; |
|
572 throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string'); |
|
573 } |
|
574 |
|
575 $this->writeVInt($chars); |
|
576 if ($containNullChars) { |
|
577 $this->_data .= str_replace($str, "\x00", "\xC0\x80"); |
|
578 |
|
579 } else { |
|
580 $this->_data .= $str; |
|
581 } |
|
582 |
|
583 $this->_position = strlen($this->_data); |
|
584 } |
|
585 |
|
586 |
|
587 /** |
|
588 * Reads binary data from the current position in the file |
|
589 * and advances the file pointer. |
|
590 * |
|
591 * @return string |
|
592 */ |
|
593 public function readBinary() |
|
594 { |
|
595 $length = $this->readVInt(); |
|
596 $returnValue = substr($this->_data, $this->_position, $length); |
|
597 $this->_position += $length; |
|
598 return $returnValue; |
|
599 } |
|
600 } |
|
601 |