|
1 <?php |
|
2 /** |
|
3 * Zend Framework |
|
4 * |
|
5 * LICENSE |
|
6 * |
|
7 * This source file is subject to the new BSD license that is bundled |
|
8 * with this package in the file LICENSE.txt. |
|
9 * It is also available through the world-wide-web at this URL: |
|
10 * http://framework.zend.com/license/new-bsd |
|
11 * If you did not receive a copy of the license and are unable to |
|
12 * obtain it through the world-wide-web, please send an email |
|
13 * to license@zend.com so we can send you a copy immediately. |
|
14 * |
|
15 * @category Zend |
|
16 * @package Zend_Search_Lucene |
|
17 * @subpackage Analysis |
|
18 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
19 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
20 * @version $Id: Token.php 20096 2010-01-06 02:05:09Z bkarwin $ |
|
21 */ |
|
22 |
|
23 |
|
24 /** |
|
25 * @category Zend |
|
26 * @package Zend_Search_Lucene |
|
27 * @subpackage Analysis |
|
28 * @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
29 * @license http://framework.zend.com/license/new-bsd New BSD License |
|
30 */ |
|
31 class Zend_Search_Lucene_Analysis_Token |
|
32 { |
|
33 /** |
|
34 * The text of the term. |
|
35 * |
|
36 * @var string |
|
37 */ |
|
38 private $_termText; |
|
39 |
|
40 /** |
|
41 * Start in source text. |
|
42 * |
|
43 * @var integer |
|
44 */ |
|
45 private $_startOffset; |
|
46 |
|
47 /** |
|
48 * End in source text |
|
49 * |
|
50 * @var integer |
|
51 */ |
|
52 private $_endOffset; |
|
53 |
|
54 /** |
|
55 * The position of this token relative to the previous Token. |
|
56 * |
|
57 * The default value is one. |
|
58 * |
|
59 * Some common uses for this are: |
|
60 * Set it to zero to put multiple terms in the same position. This is |
|
61 * useful if, e.g., a word has multiple stems. Searches for phrases |
|
62 * including either stem will match. In this case, all but the first stem's |
|
63 * increment should be set to zero: the increment of the first instance |
|
64 * should be one. Repeating a token with an increment of zero can also be |
|
65 * used to boost the scores of matches on that token. |
|
66 * |
|
67 * Set it to values greater than one to inhibit exact phrase matches. |
|
68 * If, for example, one does not want phrases to match across removed stop |
|
69 * words, then one could build a stop word filter that removes stop words and |
|
70 * also sets the increment to the number of stop words removed before each |
|
71 * non-stop word. Then exact phrase queries will only match when the terms |
|
72 * occur with no intervening stop words. |
|
73 * |
|
74 * @var integer |
|
75 */ |
|
76 private $_positionIncrement; |
|
77 |
|
78 |
|
79 /** |
|
80 * Object constructor |
|
81 * |
|
82 * @param string $text |
|
83 * @param integer $start |
|
84 * @param integer $end |
|
85 * @param string $type |
|
86 */ |
|
87 public function __construct($text, $start, $end) |
|
88 { |
|
89 $this->_termText = $text; |
|
90 $this->_startOffset = $start; |
|
91 $this->_endOffset = $end; |
|
92 |
|
93 $this->_positionIncrement = 1; |
|
94 } |
|
95 |
|
96 |
|
97 /** |
|
98 * positionIncrement setter |
|
99 * |
|
100 * @param integer $positionIncrement |
|
101 */ |
|
102 public function setPositionIncrement($positionIncrement) |
|
103 { |
|
104 $this->_positionIncrement = $positionIncrement; |
|
105 } |
|
106 |
|
107 /** |
|
108 * Returns the position increment of this Token. |
|
109 * |
|
110 * @return integer |
|
111 */ |
|
112 public function getPositionIncrement() |
|
113 { |
|
114 return $this->_positionIncrement; |
|
115 } |
|
116 |
|
117 /** |
|
118 * Returns the Token's term text. |
|
119 * |
|
120 * @return string |
|
121 */ |
|
122 public function getTermText() |
|
123 { |
|
124 return $this->_termText; |
|
125 } |
|
126 |
|
127 /** |
|
128 * Returns this Token's starting offset, the position of the first character |
|
129 * corresponding to this token in the source text. |
|
130 * |
|
131 * Note: |
|
132 * The difference between getEndOffset() and getStartOffset() may not be equal |
|
133 * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered |
|
134 * by a stemmer or some other filter. |
|
135 * |
|
136 * @return integer |
|
137 */ |
|
138 public function getStartOffset() |
|
139 { |
|
140 return $this->_startOffset; |
|
141 } |
|
142 |
|
143 /** |
|
144 * Returns this Token's ending offset, one greater than the position of the |
|
145 * last character corresponding to this token in the source text. |
|
146 * |
|
147 * @return integer |
|
148 */ |
|
149 public function getEndOffset() |
|
150 { |
|
151 return $this->_endOffset; |
|
152 } |
|
153 } |
|
154 |