web/lib/Zend/Search/Lucene/Index/Term.php
changeset 64 162c1de6545a
parent 19 1c2f13fd785c
child 68 ecaf28ffe26e
equal deleted inserted replaced
63:5b37998e522e 64:162c1de6545a
       
     1 <?php
       
     2 /**
       
     3  * Zend Framework
       
     4  *
       
     5  * LICENSE
       
     6  *
       
     7  * This source file is subject to the new BSD license that is bundled
       
     8  * with this package in the file LICENSE.txt.
       
     9  * It is also available through the world-wide-web at this URL:
       
    10  * http://framework.zend.com/license/new-bsd
       
    11  * If you did not receive a copy of the license and are unable to
       
    12  * obtain it through the world-wide-web, please send an email
       
    13  * to license@zend.com so we can send you a copy immediately.
       
    14  *
       
    15  * @category   Zend
       
    16  * @package    Zend_Search_Lucene
       
    17  * @subpackage Index
       
    18  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    19  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    20  * @version    $Id: Term.php 20096 2010-01-06 02:05:09Z bkarwin $
       
    21  */
       
    22 
       
    23 
       
    24 /**
       
    25  * A Term represents a word from text.  This is the unit of search.  It is
       
    26  * composed of two elements, the text of the word, as a string, and the name of
       
    27  * the field that the text occured in, an interned string.
       
    28  *
       
    29  * Note that terms may represent more than words from text fields, but also
       
    30  * things like dates, email addresses, urls, etc.
       
    31  *
       
    32  * @category   Zend
       
    33  * @package    Zend_Search_Lucene
       
    34  * @subpackage Index
       
    35  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    36  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    37  */
       
    38 class Zend_Search_Lucene_Index_Term
       
    39 {
       
    40     /**
       
    41      * Field name or field number (depending from context)
       
    42      *
       
    43      * @var mixed
       
    44      */
       
    45     public $field;
       
    46 
       
    47     /**
       
    48      * Term value
       
    49      *
       
    50      * @var string
       
    51      */
       
    52     public $text;
       
    53 
       
    54 
       
    55     /**
       
    56      * Object constructor
       
    57      */
       
    58     public function __construct($text, $field = null)
       
    59     {
       
    60         $this->field = ($field === null)?  Zend_Search_Lucene::getDefaultSearchField() : $field;
       
    61         $this->text  = $text;
       
    62     }
       
    63 
       
    64 
       
    65     /**
       
    66      * Returns term key
       
    67      *
       
    68      * @return string
       
    69      */
       
    70     public function key()
       
    71     {
       
    72         return $this->field . chr(0) . $this->text;
       
    73     }
       
    74 
       
    75     /**
       
    76      * Get term prefix
       
    77      *
       
    78      * @param string $str
       
    79      * @param integer $length
       
    80      * @return string
       
    81      */
       
    82     public static function getPrefix($str, $length)
       
    83     {
       
    84         $prefixBytes = 0;
       
    85         $prefixChars = 0;
       
    86         while ($prefixBytes < strlen($str)  &&  $prefixChars < $length) {
       
    87             $charBytes = 1;
       
    88             if ((ord($str[$prefixBytes]) & 0xC0) == 0xC0) {
       
    89                 $charBytes++;
       
    90                 if (ord($str[$prefixBytes]) & 0x20 ) {
       
    91                     $charBytes++;
       
    92                     if (ord($str[$prefixBytes]) & 0x10 ) {
       
    93                         $charBytes++;
       
    94                     }
       
    95                 }
       
    96             }
       
    97 
       
    98             if ($prefixBytes + $charBytes > strlen($str)) {
       
    99                 // wrong character
       
   100                 break;
       
   101             }
       
   102 
       
   103             $prefixChars++;
       
   104             $prefixBytes += $charBytes;
       
   105         }
       
   106 
       
   107         return substr($str, 0, $prefixBytes);
       
   108     }
       
   109 
       
   110     /**
       
   111      * Get UTF-8 string length
       
   112      *
       
   113      * @param string $str
       
   114      * @return string
       
   115      */
       
   116     public static function getLength($str)
       
   117     {
       
   118         $bytes = 0;
       
   119         $chars = 0;
       
   120         while ($bytes < strlen($str)) {
       
   121             $charBytes = 1;
       
   122             if ((ord($str[$bytes]) & 0xC0) == 0xC0) {
       
   123                 $charBytes++;
       
   124                 if (ord($str[$bytes]) & 0x20 ) {
       
   125                     $charBytes++;
       
   126                     if (ord($str[$bytes]) & 0x10 ) {
       
   127                         $charBytes++;
       
   128                     }
       
   129                 }
       
   130             }
       
   131 
       
   132             if ($bytes + $charBytes > strlen($str)) {
       
   133                 // wrong character
       
   134                 break;
       
   135             }
       
   136 
       
   137             $chars++;
       
   138             $bytes += $charBytes;
       
   139         }
       
   140 
       
   141         return $chars;
       
   142     }
       
   143 }
       
   144