web/lib/Zend/Search/Lucene/Storage/File.php
changeset 64 162c1de6545a
parent 19 1c2f13fd785c
child 68 ecaf28ffe26e
equal deleted inserted replaced
63:5b37998e522e 64:162c1de6545a
       
     1 <?php
       
     2 /**
       
     3  * Zend Framework
       
     4  *
       
     5  * LICENSE
       
     6  *
       
     7  * This source file is subject to the new BSD license that is bundled
       
     8  * with this package in the file LICENSE.txt.
       
     9  * It is also available through the world-wide-web at this URL:
       
    10  * http://framework.zend.com/license/new-bsd
       
    11  * If you did not receive a copy of the license and are unable to
       
    12  * obtain it through the world-wide-web, please send an email
       
    13  * to license@zend.com so we can send you a copy immediately.
       
    14  *
       
    15  * @category   Zend
       
    16  * @package    Zend_Search_Lucene
       
    17  * @subpackage Storage
       
    18  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    19  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    20  * @version    $Id: File.php 20096 2010-01-06 02:05:09Z bkarwin $
       
    21  */
       
    22 
       
    23 /**
       
    24  * @category   Zend
       
    25  * @package    Zend_Search_Lucene
       
    26  * @subpackage Storage
       
    27  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    28  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    29  */
       
    30 abstract class Zend_Search_Lucene_Storage_File
       
    31 {
       
    32     /**
       
    33      * Reads $length number of bytes at the current position in the
       
    34      * file and advances the file pointer.
       
    35      *
       
    36      * @param integer $length
       
    37      * @return string
       
    38      */
       
    39     abstract protected function _fread($length=1);
       
    40 
       
    41 
       
    42     /**
       
    43      * Sets the file position indicator and advances the file pointer.
       
    44      * The new position, measured in bytes from the beginning of the file,
       
    45      * is obtained by adding offset to the position specified by whence,
       
    46      * whose values are defined as follows:
       
    47      * SEEK_SET - Set position equal to offset bytes.
       
    48      * SEEK_CUR - Set position to current location plus offset.
       
    49      * SEEK_END - Set position to end-of-file plus offset. (To move to
       
    50      * a position before the end-of-file, you need to pass a negative value
       
    51      * in offset.)
       
    52      * Upon success, returns 0; otherwise, returns -1
       
    53      *
       
    54      * @param integer $offset
       
    55      * @param integer $whence
       
    56      * @return integer
       
    57      */
       
    58     abstract public function seek($offset, $whence=SEEK_SET);
       
    59 
       
    60     /**
       
    61      * Get file position.
       
    62      *
       
    63      * @return integer
       
    64      */
       
    65     abstract public function tell();
       
    66 
       
    67     /**
       
    68      * Flush output.
       
    69      *
       
    70      * Returns true on success or false on failure.
       
    71      *
       
    72      * @return boolean
       
    73      */
       
    74     abstract public function flush();
       
    75 
       
    76     /**
       
    77      * Writes $length number of bytes (all, if $length===null) to the end
       
    78      * of the file.
       
    79      *
       
    80      * @param string $data
       
    81      * @param integer $length
       
    82      */
       
    83     abstract protected function _fwrite($data, $length=null);
       
    84 
       
    85     /**
       
    86      * Lock file
       
    87      *
       
    88      * Lock type may be a LOCK_SH (shared lock) or a LOCK_EX (exclusive lock)
       
    89      *
       
    90      * @param integer $lockType
       
    91      * @return boolean
       
    92      */
       
    93     abstract public function lock($lockType, $nonBlockinLock = false);
       
    94 
       
    95     /**
       
    96      * Unlock file
       
    97      */
       
    98     abstract public function unlock();
       
    99 
       
   100     /**
       
   101      * Reads a byte from the current position in the file
       
   102      * and advances the file pointer.
       
   103      *
       
   104      * @return integer
       
   105      */
       
   106     public function readByte()
       
   107     {
       
   108         return ord($this->_fread(1));
       
   109     }
       
   110 
       
   111     /**
       
   112      * Writes a byte to the end of the file.
       
   113      *
       
   114      * @param integer $byte
       
   115      */
       
   116     public function writeByte($byte)
       
   117     {
       
   118         return $this->_fwrite(chr($byte), 1);
       
   119     }
       
   120 
       
   121     /**
       
   122      * Read num bytes from the current position in the file
       
   123      * and advances the file pointer.
       
   124      *
       
   125      * @param integer $num
       
   126      * @return string
       
   127      */
       
   128     public function readBytes($num)
       
   129     {
       
   130         return $this->_fread($num);
       
   131     }
       
   132 
       
   133     /**
       
   134      * Writes num bytes of data (all, if $num===null) to the end
       
   135      * of the string.
       
   136      *
       
   137      * @param string $data
       
   138      * @param integer $num
       
   139      */
       
   140     public function writeBytes($data, $num=null)
       
   141     {
       
   142         $this->_fwrite($data, $num);
       
   143     }
       
   144 
       
   145 
       
   146     /**
       
   147      * Reads an integer from the current position in the file
       
   148      * and advances the file pointer.
       
   149      *
       
   150      * @return integer
       
   151      */
       
   152     public function readInt()
       
   153     {
       
   154         $str = $this->_fread(4);
       
   155 
       
   156         return  ord($str[0]) << 24 |
       
   157                 ord($str[1]) << 16 |
       
   158                 ord($str[2]) << 8  |
       
   159                 ord($str[3]);
       
   160     }
       
   161 
       
   162 
       
   163     /**
       
   164      * Writes an integer to the end of file.
       
   165      *
       
   166      * @param integer $value
       
   167      */
       
   168     public function writeInt($value)
       
   169     {
       
   170         settype($value, 'integer');
       
   171         $this->_fwrite( chr($value>>24 & 0xFF) .
       
   172                         chr($value>>16 & 0xFF) .
       
   173                         chr($value>>8  & 0xFF) .
       
   174                         chr($value     & 0xFF),   4  );
       
   175     }
       
   176 
       
   177 
       
   178     /**
       
   179      * Returns a long integer from the current position in the file
       
   180      * and advances the file pointer.
       
   181      *
       
   182      * @return integer|float
       
   183      * @throws Zend_Search_Lucene_Exception
       
   184      */
       
   185     public function readLong()
       
   186     {
       
   187         /**
       
   188          * Check, that we work in 64-bit mode.
       
   189          * fseek() uses long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
       
   190          */
       
   191         if (PHP_INT_SIZE > 4) {
       
   192             $str = $this->_fread(8);
       
   193 
       
   194             return  ord($str[0]) << 56  |
       
   195                     ord($str[1]) << 48  |
       
   196                     ord($str[2]) << 40  |
       
   197                     ord($str[3]) << 32  |
       
   198                     ord($str[4]) << 24  |
       
   199                     ord($str[5]) << 16  |
       
   200                     ord($str[6]) << 8   |
       
   201                     ord($str[7]);
       
   202         } else {
       
   203             return $this->readLong32Bit();
       
   204         }
       
   205     }
       
   206 
       
   207     /**
       
   208      * Writes long integer to the end of file
       
   209      *
       
   210      * @param integer $value
       
   211      * @throws Zend_Search_Lucene_Exception
       
   212      */
       
   213     public function writeLong($value)
       
   214     {
       
   215         /**
       
   216          * Check, that we work in 64-bit mode.
       
   217          * fseek() and ftell() use long for offset. Thus, largest index segment file size in 32bit mode is 2Gb
       
   218          */
       
   219         if (PHP_INT_SIZE > 4) {
       
   220             settype($value, 'integer');
       
   221             $this->_fwrite( chr($value>>56 & 0xFF) .
       
   222                             chr($value>>48 & 0xFF) .
       
   223                             chr($value>>40 & 0xFF) .
       
   224                             chr($value>>32 & 0xFF) .
       
   225                             chr($value>>24 & 0xFF) .
       
   226                             chr($value>>16 & 0xFF) .
       
   227                             chr($value>>8  & 0xFF) .
       
   228                             chr($value     & 0xFF),   8  );
       
   229         } else {
       
   230             $this->writeLong32Bit($value);
       
   231         }
       
   232     }
       
   233 
       
   234 
       
   235     /**
       
   236      * Returns a long integer from the current position in the file,
       
   237      * advances the file pointer and return it as float (for 32-bit platforms).
       
   238      *
       
   239      * @return integer|float
       
   240      * @throws Zend_Search_Lucene_Exception
       
   241      */
       
   242     public function readLong32Bit()
       
   243     {
       
   244         $wordHigh = $this->readInt();
       
   245         $wordLow  = $this->readInt();
       
   246 
       
   247         if ($wordHigh & (int)0x80000000) {
       
   248             // It's a negative value since the highest bit is set
       
   249             if ($wordHigh == (int)0xFFFFFFFF  &&  ($wordLow & (int)0x80000000)) {
       
   250                 return $wordLow;
       
   251             } else {
       
   252                 require_once 'Zend/Search/Lucene/Exception.php';
       
   253                 throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
       
   254             }
       
   255 
       
   256         }
       
   257 
       
   258         if ($wordLow < 0) {
       
   259             // Value is large than 0x7FFF FFFF. Represent low word as float.
       
   260             $wordLow &= 0x7FFFFFFF;
       
   261             $wordLow += (float)0x80000000;
       
   262         }
       
   263 
       
   264         if ($wordHigh == 0) {
       
   265             // Return value as integer if possible
       
   266             return $wordLow;
       
   267         }
       
   268 
       
   269         return $wordHigh*(float)0x100000000/* 0x00000001 00000000 */ + $wordLow;
       
   270     }
       
   271 
       
   272 
       
   273     /**
       
   274      * Writes long integer to the end of file (32-bit platforms implementation)
       
   275      *
       
   276      * @param integer|float $value
       
   277      * @throws Zend_Search_Lucene_Exception
       
   278      */
       
   279     public function writeLong32Bit($value)
       
   280     {
       
   281         if ($value < (int)0x80000000) {
       
   282             require_once 'Zend/Search/Lucene/Exception.php';
       
   283             throw new Zend_Search_Lucene_Exception('Long integers lower than -2147483648 (0x80000000) are not supported on 32-bit platforms.');
       
   284         }
       
   285 
       
   286         if ($value < 0) {
       
   287             $wordHigh = (int)0xFFFFFFFF;
       
   288             $wordLow  = (int)$value;
       
   289         } else {
       
   290             $wordHigh = (int)($value/(float)0x100000000/* 0x00000001 00000000 */);
       
   291             $wordLow  = $value - $wordHigh*(float)0x100000000/* 0x00000001 00000000 */;
       
   292 
       
   293             if ($wordLow > 0x7FFFFFFF) {
       
   294                 // Highest bit of low word is set. Translate it to the corresponding negative integer value
       
   295                 $wordLow -= 0x80000000;
       
   296                 $wordLow |= 0x80000000;
       
   297             }
       
   298         }
       
   299 
       
   300         $this->writeInt($wordHigh);
       
   301         $this->writeInt($wordLow);
       
   302     }
       
   303 
       
   304 
       
   305     /**
       
   306      * Returns a variable-length integer from the current
       
   307      * position in the file and advances the file pointer.
       
   308      *
       
   309      * @return integer
       
   310      */
       
   311     public function readVInt()
       
   312     {
       
   313         $nextByte = ord($this->_fread(1));
       
   314         $val = $nextByte & 0x7F;
       
   315 
       
   316         for ($shift=7; ($nextByte & 0x80) != 0; $shift += 7) {
       
   317             $nextByte = ord($this->_fread(1));
       
   318             $val |= ($nextByte & 0x7F) << $shift;
       
   319         }
       
   320         return $val;
       
   321     }
       
   322 
       
   323     /**
       
   324      * Writes a variable-length integer to the end of file.
       
   325      *
       
   326      * @param integer $value
       
   327      */
       
   328     public function writeVInt($value)
       
   329     {
       
   330         settype($value, 'integer');
       
   331         while ($value > 0x7F) {
       
   332             $this->_fwrite(chr( ($value & 0x7F)|0x80 ));
       
   333             $value >>= 7;
       
   334         }
       
   335         $this->_fwrite(chr($value));
       
   336     }
       
   337 
       
   338 
       
   339     /**
       
   340      * Reads a string from the current position in the file
       
   341      * and advances the file pointer.
       
   342      *
       
   343      * @return string
       
   344      */
       
   345     public function readString()
       
   346     {
       
   347         $strlen = $this->readVInt();
       
   348         if ($strlen == 0) {
       
   349             return '';
       
   350         } else {
       
   351             /**
       
   352              * This implementation supports only Basic Multilingual Plane
       
   353              * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
       
   354              * "supplementary characters" (characters whose code points are
       
   355              * greater than 0xFFFF)
       
   356              * Java 2 represents these characters as a pair of char (16-bit)
       
   357              * values, the first from the high-surrogates range (0xD800-0xDBFF),
       
   358              * the second from the low-surrogates range (0xDC00-0xDFFF). Then
       
   359              * they are encoded as usual UTF-8 characters in six bytes.
       
   360              * Standard UTF-8 representation uses four bytes for supplementary
       
   361              * characters.
       
   362              */
       
   363 
       
   364             $str_val = $this->_fread($strlen);
       
   365 
       
   366             for ($count = 0; $count < $strlen; $count++ ) {
       
   367                 if (( ord($str_val[$count]) & 0xC0 ) == 0xC0) {
       
   368                     $addBytes = 1;
       
   369                     if (ord($str_val[$count]) & 0x20 ) {
       
   370                         $addBytes++;
       
   371 
       
   372                         // Never used. Java2 doesn't encode strings in four bytes
       
   373                         if (ord($str_val[$count]) & 0x10 ) {
       
   374                             $addBytes++;
       
   375                         }
       
   376                     }
       
   377                     $str_val .= $this->_fread($addBytes);
       
   378                     $strlen += $addBytes;
       
   379 
       
   380                     // Check for null character. Java2 encodes null character
       
   381                     // in two bytes.
       
   382                     if (ord($str_val[$count])   == 0xC0 &&
       
   383                         ord($str_val[$count+1]) == 0x80   ) {
       
   384                         $str_val[$count] = 0;
       
   385                         $str_val = substr($str_val,0,$count+1)
       
   386                                  . substr($str_val,$count+2);
       
   387                     }
       
   388                     $count += $addBytes;
       
   389                 }
       
   390             }
       
   391 
       
   392             return $str_val;
       
   393         }
       
   394     }
       
   395 
       
   396     /**
       
   397      * Writes a string to the end of file.
       
   398      *
       
   399      * @param string $str
       
   400      * @throws Zend_Search_Lucene_Exception
       
   401      */
       
   402     public function writeString($str)
       
   403     {
       
   404         /**
       
   405          * This implementation supports only Basic Multilingual Plane
       
   406          * (BMP) characters (from 0x0000 to 0xFFFF) and doesn't support
       
   407          * "supplementary characters" (characters whose code points are
       
   408          * greater than 0xFFFF)
       
   409          * Java 2 represents these characters as a pair of char (16-bit)
       
   410          * values, the first from the high-surrogates range (0xD800-0xDBFF),
       
   411          * the second from the low-surrogates range (0xDC00-0xDFFF). Then
       
   412          * they are encoded as usual UTF-8 characters in six bytes.
       
   413          * Standard UTF-8 representation uses four bytes for supplementary
       
   414          * characters.
       
   415          */
       
   416 
       
   417         // convert input to a string before iterating string characters
       
   418         settype($str, 'string');
       
   419 
       
   420         $chars = $strlen = strlen($str);
       
   421         $containNullChars = false;
       
   422 
       
   423         for ($count = 0; $count < $strlen; $count++ ) {
       
   424             /**
       
   425              * String is already in Java 2 representation.
       
   426              * We should only calculate actual string length and replace
       
   427              * \x00 by \xC0\x80
       
   428              */
       
   429             if ((ord($str[$count]) & 0xC0) == 0xC0) {
       
   430                 $addBytes = 1;
       
   431                 if (ord($str[$count]) & 0x20 ) {
       
   432                     $addBytes++;
       
   433 
       
   434                     // Never used. Java2 doesn't encode strings in four bytes
       
   435                     // and we dont't support non-BMP characters
       
   436                     if (ord($str[$count]) & 0x10 ) {
       
   437                         $addBytes++;
       
   438                     }
       
   439                 }
       
   440                 $chars -= $addBytes;
       
   441 
       
   442                 if (ord($str[$count]) == 0 ) {
       
   443                     $containNullChars = true;
       
   444                 }
       
   445                 $count += $addBytes;
       
   446             }
       
   447         }
       
   448 
       
   449         if ($chars < 0) {
       
   450             require_once 'Zend/Search/Lucene/Exception.php';
       
   451             throw new Zend_Search_Lucene_Exception('Invalid UTF-8 string');
       
   452         }
       
   453 
       
   454         $this->writeVInt($chars);
       
   455         if ($containNullChars) {
       
   456             $this->_fwrite(str_replace($str, "\x00", "\xC0\x80"));
       
   457         } else {
       
   458             $this->_fwrite($str);
       
   459         }
       
   460     }
       
   461 
       
   462 
       
   463     /**
       
   464      * Reads binary data from the current position in the file
       
   465      * and advances the file pointer.
       
   466      *
       
   467      * @return string
       
   468      */
       
   469     public function readBinary()
       
   470     {
       
   471         return $this->_fread($this->readVInt());
       
   472     }
       
   473 }