|
1 <?php |
|
2 /** |
|
3 * Zend Framework |
|
4 * |
|
5 * LICENSE |
|
6 * |
|
7 * This source file is subject to the new BSD license that is bundled |
|
8 * with this package in the file LICENSE.txt. |
|
9 * It is also available through the world-wide-web at this URL: |
|
10 * http://framework.zend.com/license/new-bsd |
|
11 * If you did not receive a copy of the license and are unable to |
|
12 * obtain it through the world-wide-web, please send an email |
|
13 * to license@zend.com so we can send you a copy immediately. |
|
14 * |
|
15 * @category Zend |
|
16 * @package Zend_Search_Lucene |
|
17 * @subpackage Index |
|
18 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
19 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
20 * @version $Id: Term.php 20096 2010-01-06 02:05:09Z bkarwin $ |
|
21 */ |
|
22 |
|
23 |
|
24 /** |
|
25 * A Term represents a word from text. This is the unit of search. It is |
|
26 * composed of two elements, the text of the word, as a string, and the name of |
|
27 * the field that the text occured in, an interned string. |
|
28 * |
|
29 * Note that terms may represent more than words from text fields, but also |
|
30 * things like dates, email addresses, urls, etc. |
|
31 * |
|
32 * @category Zend |
|
33 * @package Zend_Search_Lucene |
|
34 * @subpackage Index |
|
35 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
36 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
37 */ |
|
38 class Zend_Search_Lucene_Index_Term |
|
39 { |
|
40 /** |
|
41 * Field name or field number (depending from context) |
|
42 * |
|
43 * @var mixed |
|
44 */ |
|
45 public $field; |
|
46 |
|
47 /** |
|
48 * Term value |
|
49 * |
|
50 * @var string |
|
51 */ |
|
52 public $text; |
|
53 |
|
54 |
|
55 /** |
|
56 * Object constructor |
|
57 */ |
|
58 public function __construct($text, $field = null) |
|
59 { |
|
60 $this->field = ($field === null)? Zend_Search_Lucene::getDefaultSearchField() : $field; |
|
61 $this->text = $text; |
|
62 } |
|
63 |
|
64 |
|
65 /** |
|
66 * Returns term key |
|
67 * |
|
68 * @return string |
|
69 */ |
|
70 public function key() |
|
71 { |
|
72 return $this->field . chr(0) . $this->text; |
|
73 } |
|
74 |
|
75 /** |
|
76 * Get term prefix |
|
77 * |
|
78 * @param string $str |
|
79 * @param integer $length |
|
80 * @return string |
|
81 */ |
|
82 public static function getPrefix($str, $length) |
|
83 { |
|
84 $prefixBytes = 0; |
|
85 $prefixChars = 0; |
|
86 while ($prefixBytes < strlen($str) && $prefixChars < $length) { |
|
87 $charBytes = 1; |
|
88 if ((ord($str[$prefixBytes]) & 0xC0) == 0xC0) { |
|
89 $charBytes++; |
|
90 if (ord($str[$prefixBytes]) & 0x20 ) { |
|
91 $charBytes++; |
|
92 if (ord($str[$prefixBytes]) & 0x10 ) { |
|
93 $charBytes++; |
|
94 } |
|
95 } |
|
96 } |
|
97 |
|
98 if ($prefixBytes + $charBytes > strlen($str)) { |
|
99 // wrong character |
|
100 break; |
|
101 } |
|
102 |
|
103 $prefixChars++; |
|
104 $prefixBytes += $charBytes; |
|
105 } |
|
106 |
|
107 return substr($str, 0, $prefixBytes); |
|
108 } |
|
109 |
|
110 /** |
|
111 * Get UTF-8 string length |
|
112 * |
|
113 * @param string $str |
|
114 * @return string |
|
115 */ |
|
116 public static function getLength($str) |
|
117 { |
|
118 $bytes = 0; |
|
119 $chars = 0; |
|
120 while ($bytes < strlen($str)) { |
|
121 $charBytes = 1; |
|
122 if ((ord($str[$bytes]) & 0xC0) == 0xC0) { |
|
123 $charBytes++; |
|
124 if (ord($str[$bytes]) & 0x20 ) { |
|
125 $charBytes++; |
|
126 if (ord($str[$bytes]) & 0x10 ) { |
|
127 $charBytes++; |
|
128 } |
|
129 } |
|
130 } |
|
131 |
|
132 if ($bytes + $charBytes > strlen($str)) { |
|
133 // wrong character |
|
134 break; |
|
135 } |
|
136 |
|
137 $chars++; |
|
138 $bytes += $charBytes; |
|
139 } |
|
140 |
|
141 return $chars; |
|
142 } |
|
143 } |
|
144 |