| author | Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com> |
| Tue, 15 Mar 2011 16:32:22 +0100 | |
| changeset 68 | ecaf28ffe26e |
| parent 0 | web/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php@4eba9c11703f |
| parent 64 | web/Zend/Search/Lucene/Analysis/Analyzer/Common/Text.php@162c1de6545a |
| child 207 | 621fa6caec0c |
| permissions | -rw-r--r-- |
|
0
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
1 |
<?php |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
2 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
3 |
* Zend Framework |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
4 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
5 |
* LICENSE |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
6 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
7 |
* This source file is subject to the new BSD license that is bundled |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
8 |
* with this package in the file LICENSE.txt. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
9 |
* It is also available through the world-wide-web at this URL: |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
10 |
* http://framework.zend.com/license/new-bsd |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
11 |
* If you did not receive a copy of the license and are unable to |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
12 |
* obtain it through the world-wide-web, please send an email |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
13 |
* to license@zend.com so we can send you a copy immediately. |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
14 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
15 |
* @category Zend |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
16 |
* @package Zend_Search_Lucene |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
17 |
* @subpackage Analysis |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
18 |
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
19 |
* @license http://framework.zend.com/license/new-bsd New BSD License |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
20 |
* @version $Id: Text.php 20096 2010-01-06 02:05:09Z bkarwin $ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
21 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
22 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
23 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
24 |
/** Zend_Search_Lucene_Analysis_Analyzer_Common */ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
25 |
require_once 'Zend/Search/Lucene/Analysis/Analyzer/Common.php'; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
26 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
27 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
28 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
29 |
* @category Zend |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
30 |
* @package Zend_Search_Lucene |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
31 |
* @subpackage Analysis |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
32 |
* @copyright Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
33 |
* @license http://framework.zend.com/license/new-bsd New BSD License |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
34 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
35 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
36 |
class Zend_Search_Lucene_Analysis_Analyzer_Common_Text extends Zend_Search_Lucene_Analysis_Analyzer_Common |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
37 |
{ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
38 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
39 |
* Current position in a stream |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
40 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
41 |
* @var integer |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
42 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
43 |
private $_position; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
44 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
45 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
46 |
* Reset token stream |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
47 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
48 |
public function reset() |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
49 |
{ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
50 |
$this->_position = 0; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
51 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
52 |
if ($this->_input === null) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
53 |
return; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
54 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
55 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
56 |
// convert input into ascii |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
57 |
if (PHP_OS != 'AIX') { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
58 |
$this->_input = iconv($this->_encoding, 'ASCII//TRANSLIT', $this->_input); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
59 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
60 |
$this->_encoding = 'ASCII'; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
61 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
62 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
63 |
/** |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
64 |
* Tokenization stream API |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
65 |
* Get next token |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
66 |
* Returns null at the end of stream |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
67 |
* |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
68 |
* @return Zend_Search_Lucene_Analysis_Token|null |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
69 |
*/ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
70 |
public function nextToken() |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
71 |
{ |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
72 |
if ($this->_input === null) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
73 |
return null; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
74 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
75 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
76 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
77 |
do { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
78 |
if (! preg_match('/[a-zA-Z]+/', $this->_input, $match, PREG_OFFSET_CAPTURE, $this->_position)) { |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
79 |
// It covers both cases a) there are no matches (preg_match(...) === 0) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
80 |
// b) error occured (preg_match(...) === FALSE) |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
81 |
return null; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
82 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
83 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
84 |
$str = $match[0][0]; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
85 |
$pos = $match[0][1]; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
86 |
$endpos = $pos + strlen($str); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
87 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
88 |
$this->_position = $endpos; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
89 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
90 |
$token = $this->normalize(new Zend_Search_Lucene_Analysis_Token($str, $pos, $endpos)); |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
91 |
} while ($token === null); // try again if token is skipped |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
92 |
|
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
93 |
return $token; |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
94 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
95 |
} |
|
4eba9c11703f
first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
96 |