web/lib/Zend/Search/Lucene/Index/Term.php
author Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Mon, 19 Dec 2011 00:56:02 +0100
changeset 428 8b51d23b758f
parent 68 ecaf28ffe26e
child 207 621fa6caec0c
permissions -rw-r--r--
correct sync
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     1
<?php
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     2
/**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     3
 * Zend Framework
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     4
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     5
 * LICENSE
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     6
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     7
 * This source file is subject to the new BSD license that is bundled
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     8
 * with this package in the file LICENSE.txt.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     9
 * It is also available through the world-wide-web at this URL:
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    10
 * http://framework.zend.com/license/new-bsd
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    11
 * If you did not receive a copy of the license and are unable to
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    12
 * obtain it through the world-wide-web, please send an email
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    13
 * to license@zend.com so we can send you a copy immediately.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    14
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    15
 * @category   Zend
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    16
 * @package    Zend_Search_Lucene
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    17
 * @subpackage Index
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    18
 * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    19
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    20
 * @version    $Id: Term.php 20096 2010-01-06 02:05:09Z bkarwin $
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    21
 */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    22
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    23
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    24
/**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    25
 * A Term represents a word from text.  This is the unit of search.  It is
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    26
 * composed of two elements, the text of the word, as a string, and the name of
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    27
 * the field that the text occured in, an interned string.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    28
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    29
 * Note that terms may represent more than words from text fields, but also
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    30
 * things like dates, email addresses, urls, etc.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    31
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    32
 * @category   Zend
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    33
 * @package    Zend_Search_Lucene
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    34
 * @subpackage Index
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    35
 * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    36
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    37
 */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    38
class Zend_Search_Lucene_Index_Term
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    39
{
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    40
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    41
     * Field name or field number (depending from context)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    42
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    43
     * @var mixed
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    44
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    45
    public $field;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    46
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    47
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    48
     * Term value
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    49
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    50
     * @var string
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    51
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    52
    public $text;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    53
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    54
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    55
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    56
     * Object constructor
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    57
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    58
    public function __construct($text, $field = null)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    59
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    60
        $this->field = ($field === null)?  Zend_Search_Lucene::getDefaultSearchField() : $field;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    61
        $this->text  = $text;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    62
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    63
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    64
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    65
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    66
     * Returns term key
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    67
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    68
     * @return string
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    69
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    70
    public function key()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    71
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    72
        return $this->field . chr(0) . $this->text;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    73
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    74
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    75
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    76
     * Get term prefix
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    77
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    78
     * @param string $str
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    79
     * @param integer $length
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    80
     * @return string
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    81
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    82
    public static function getPrefix($str, $length)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    83
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    84
        $prefixBytes = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    85
        $prefixChars = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    86
        while ($prefixBytes < strlen($str)  &&  $prefixChars < $length) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    87
            $charBytes = 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    88
            if ((ord($str[$prefixBytes]) & 0xC0) == 0xC0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    89
                $charBytes++;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    90
                if (ord($str[$prefixBytes]) & 0x20 ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    91
                    $charBytes++;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    92
                    if (ord($str[$prefixBytes]) & 0x10 ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    93
                        $charBytes++;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    94
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    95
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    96
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    97
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    98
            if ($prefixBytes + $charBytes > strlen($str)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    99
                // wrong character
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   100
                break;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   101
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   102
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   103
            $prefixChars++;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   104
            $prefixBytes += $charBytes;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   105
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   106
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   107
        return substr($str, 0, $prefixBytes);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   108
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   109
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   110
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   111
     * Get UTF-8 string length
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   112
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   113
     * @param string $str
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   114
     * @return string
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   115
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   116
    public static function getLength($str)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   117
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   118
        $bytes = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   119
        $chars = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   120
        while ($bytes < strlen($str)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   121
            $charBytes = 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   122
            if ((ord($str[$bytes]) & 0xC0) == 0xC0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   123
                $charBytes++;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   124
                if (ord($str[$bytes]) & 0x20 ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   125
                    $charBytes++;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   126
                    if (ord($str[$bytes]) & 0x10 ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   127
                        $charBytes++;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   128
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   129
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   130
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   131
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   132
            if ($bytes + $charBytes > strlen($str)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   133
                // wrong character
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   134
                break;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   135
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   136
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   137
            $chars++;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   138
            $bytes += $charBytes;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   139
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   140
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   141
        return $chars;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   142
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   143
}
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   144