web/enmi/Zend/Search/Lucene/Analysis/Token.php
changeset 19 1c2f13fd785c
parent 0 4eba9c11703f
equal deleted inserted replaced
18:bd595ad770fc 19:1c2f13fd785c
       
     1 <?php
       
     2 /**
       
     3  * Zend Framework
       
     4  *
       
     5  * LICENSE
       
     6  *
       
     7  * This source file is subject to the new BSD license that is bundled
       
     8  * with this package in the file LICENSE.txt.
       
     9  * It is also available through the world-wide-web at this URL:
       
    10  * http://framework.zend.com/license/new-bsd
       
    11  * If you did not receive a copy of the license and are unable to
       
    12  * obtain it through the world-wide-web, please send an email
       
    13  * to license@zend.com so we can send you a copy immediately.
       
    14  *
       
    15  * @category   Zend
       
    16  * @package    Zend_Search_Lucene
       
    17  * @subpackage Analysis
       
    18  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    19  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    20  * @version    $Id: Token.php 20096 2010-01-06 02:05:09Z bkarwin $
       
    21  */
       
    22 
       
    23 
       
    24 /**
       
    25  * @category   Zend
       
    26  * @package    Zend_Search_Lucene
       
    27  * @subpackage Analysis
       
    28  * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
       
    29  * @license    http://framework.zend.com/license/new-bsd     New BSD License
       
    30  */
       
    31 class Zend_Search_Lucene_Analysis_Token
       
    32 {
       
    33     /**
       
    34      * The text of the term.
       
    35      *
       
    36      * @var string
       
    37      */
       
    38     private $_termText;
       
    39 
       
    40     /**
       
    41      * Start in source text.
       
    42      *
       
    43      * @var integer
       
    44      */
       
    45     private $_startOffset;
       
    46 
       
    47     /**
       
    48      * End in source text
       
    49      *
       
    50      * @var integer
       
    51      */
       
    52     private $_endOffset;
       
    53 
       
    54     /**
       
    55      * The position of this token relative to the previous Token.
       
    56      *
       
    57      * The default value is one.
       
    58      *
       
    59      * Some common uses for this are:
       
    60      * Set it to zero to put multiple terms in the same position.  This is
       
    61      * useful if, e.g., a word has multiple stems.  Searches for phrases
       
    62      * including either stem will match.  In this case, all but the first stem's
       
    63      * increment should be set to zero: the increment of the first instance
       
    64      * should be one.  Repeating a token with an increment of zero can also be
       
    65      * used to boost the scores of matches on that token.
       
    66      *
       
    67      * Set it to values greater than one to inhibit exact phrase matches.
       
    68      * If, for example, one does not want phrases to match across removed stop
       
    69      * words, then one could build a stop word filter that removes stop words and
       
    70      * also sets the increment to the number of stop words removed before each
       
    71      * non-stop word.  Then exact phrase queries will only match when the terms
       
    72      * occur with no intervening stop words.
       
    73      *
       
    74      * @var integer
       
    75      */
       
    76     private $_positionIncrement;
       
    77 
       
    78 
       
    79     /**
       
    80      * Object constructor
       
    81      *
       
    82      * @param string  $text
       
    83      * @param integer $start
       
    84      * @param integer $end
       
    85      * @param string  $type
       
    86      */
       
    87     public function __construct($text, $start, $end)
       
    88     {
       
    89         $this->_termText    = $text;
       
    90         $this->_startOffset = $start;
       
    91         $this->_endOffset   = $end;
       
    92 
       
    93         $this->_positionIncrement = 1;
       
    94     }
       
    95 
       
    96 
       
    97     /**
       
    98      * positionIncrement setter
       
    99      *
       
   100      * @param integer $positionIncrement
       
   101      */
       
   102     public function setPositionIncrement($positionIncrement)
       
   103     {
       
   104         $this->_positionIncrement = $positionIncrement;
       
   105     }
       
   106 
       
   107     /**
       
   108      * Returns the position increment of this Token.
       
   109      *
       
   110      * @return integer
       
   111      */
       
   112     public function getPositionIncrement()
       
   113     {
       
   114         return $this->_positionIncrement;
       
   115     }
       
   116 
       
   117     /**
       
   118      * Returns the Token's term text.
       
   119      *
       
   120      * @return string
       
   121      */
       
   122     public function getTermText()
       
   123     {
       
   124         return $this->_termText;
       
   125     }
       
   126 
       
   127     /**
       
   128      * Returns this Token's starting offset, the position of the first character
       
   129      * corresponding to this token in the source text.
       
   130      *
       
   131      * Note:
       
   132      * The difference between getEndOffset() and getStartOffset() may not be equal
       
   133      * to strlen(Zend_Search_Lucene_Analysis_Token::getTermText()), as the term text may have been altered
       
   134      * by a stemmer or some other filter.
       
   135      *
       
   136      * @return integer
       
   137      */
       
   138     public function getStartOffset()
       
   139     {
       
   140         return $this->_startOffset;
       
   141     }
       
   142 
       
   143     /**
       
   144      * Returns this Token's ending offset, one greater than the position of the
       
   145      * last character corresponding to this token in the source text.
       
   146      *
       
   147      * @return integer
       
   148      */
       
   149     public function getEndOffset()
       
   150     {
       
   151         return $this->_endOffset;
       
   152     }
       
   153 }
       
   154