web/lib/Zend/Search/Lucene/Search/Query/Phrase.php
author Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Fri, 01 Jul 2011 18:59:13 +0200
changeset 207 621fa6caec0c
parent 0 web/Zend/Search/Lucene/Search/Query/Phrase.php@4eba9c11703f
parent 68 web/Zend/Search/Lucene/Search/Query/Phrase.php@ecaf28ffe26e
child 807 877f952ae2bd
permissions -rw-r--r--
Merge with f093196961e770387cc7cd3e11f2b7c0881b003b
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     1
<?php
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     2
/**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     3
 * Zend Framework
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     4
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     5
 * LICENSE
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     6
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     7
 * This source file is subject to the new BSD license that is bundled
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     8
 * with this package in the file LICENSE.txt.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     9
 * It is also available through the world-wide-web at this URL:
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    10
 * http://framework.zend.com/license/new-bsd
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    11
 * If you did not receive a copy of the license and are unable to
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    12
 * obtain it through the world-wide-web, please send an email
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    13
 * to license@zend.com so we can send you a copy immediately.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    14
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    15
 * @category   Zend
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    16
 * @package    Zend_Search_Lucene
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    17
 * @subpackage Search
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    18
 * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    19
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    20
 * @version    $Id: Phrase.php 20096 2010-01-06 02:05:09Z bkarwin $
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    21
 */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    22
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    23
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    24
/** Zend_Search_Lucene_Search_Query */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    25
require_once 'Zend/Search/Lucene/Search/Query.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    26
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    27
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    28
/**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    29
 * A Query that matches documents containing a particular sequence of terms.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    30
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    31
 * @category   Zend
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    32
 * @package    Zend_Search_Lucene
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    33
 * @subpackage Search
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    34
 * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    35
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    36
 */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    37
class Zend_Search_Lucene_Search_Query_Phrase extends Zend_Search_Lucene_Search_Query
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    38
{
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    39
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    40
     * Terms to find.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    41
     * Array of Zend_Search_Lucene_Index_Term objects.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    42
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    43
     * @var array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    44
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    45
    private $_terms;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    46
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    47
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    48
     * Term positions (relative positions of terms within the phrase).
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    49
     * Array of integers
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    50
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    51
     * @var array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    52
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    53
    private $_offsets;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    54
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    55
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    56
     * Sets the number of other words permitted between words in query phrase.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    57
     * If zero, then this is an exact phrase search.  For larger values this works
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    58
     * like a WITHIN or NEAR operator.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    59
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    60
     * The slop is in fact an edit-distance, where the units correspond to
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    61
     * moves of terms in the query phrase out of position.  For example, to switch
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    62
     * the order of two words requires two moves (the first move places the words
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    63
     * atop one another), so to permit re-orderings of phrases, the slop must be
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    64
     * at least two.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    65
     * More exact matches are scored higher than sloppier matches, thus search
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    66
     * results are sorted by exactness.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    67
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    68
     * The slop is zero by default, requiring exact matches.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    69
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    70
     * @var integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    71
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    72
    private $_slop;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    73
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    74
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    75
     * Result vector.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    76
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    77
     * @var array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    78
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    79
    private $_resVector = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    80
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    81
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    82
     * Terms positions vectors.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    83
     * Array of Arrays:
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    84
     * term1Id => (docId => array( pos1, pos2, ... ), ...)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    85
     * term2Id => (docId => array( pos1, pos2, ... ), ...)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    86
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    87
     * @var array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    88
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    89
    private $_termsPositions = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    90
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    91
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    92
     * Class constructor.  Create a new prase query.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    93
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    94
     * @param string $field    Field to search.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    95
     * @param array  $terms    Terms to search Array of strings.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    96
     * @param array  $offsets  Relative term positions. Array of integers.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    97
     * @throws Zend_Search_Lucene_Exception
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    98
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    99
    public function __construct($terms = null, $offsets = null, $field = null)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   100
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   101
        $this->_slop = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   102
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   103
        if (is_array($terms)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   104
            $this->_terms = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   105
            require_once 'Zend/Search/Lucene/Index/Term.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   106
            foreach ($terms as $termId => $termText) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   107
                $this->_terms[$termId] = ($field !== null)? new Zend_Search_Lucene_Index_Term($termText, $field):
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   108
                                                            new Zend_Search_Lucene_Index_Term($termText);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   109
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   110
        } else if ($terms === null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   111
            $this->_terms = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   112
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   113
            require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   114
            throw new Zend_Search_Lucene_Exception('terms argument must be array of strings or null');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   115
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   116
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   117
        if (is_array($offsets)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   118
            if (count($this->_terms) != count($offsets)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   119
                require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   120
                throw new Zend_Search_Lucene_Exception('terms and offsets arguments must have the same size.');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   121
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   122
            $this->_offsets = $offsets;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   123
        } else if ($offsets === null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   124
            $this->_offsets = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   125
            foreach ($this->_terms as $termId => $term) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   126
                $position = count($this->_offsets);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   127
                $this->_offsets[$termId] = $position;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   128
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   129
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   130
            require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   131
            throw new Zend_Search_Lucene_Exception('offsets argument must be array of strings or null');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   132
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   133
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   134
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   135
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   136
     * Set slop
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   137
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   138
     * @param integer $slop
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   139
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   140
    public function setSlop($slop)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   141
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   142
        $this->_slop = $slop;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   143
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   144
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   145
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   146
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   147
     * Get slop
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   148
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   149
     * @return integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   150
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   151
    public function getSlop()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   152
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   153
        return $this->_slop;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   154
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   155
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   156
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   157
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   158
     * Adds a term to the end of the query phrase.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   159
     * The relative position of the term is specified explicitly or the one immediately
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   160
     * after the last term added.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   161
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   162
     * @param Zend_Search_Lucene_Index_Term $term
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   163
     * @param integer $position
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   164
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   165
    public function addTerm(Zend_Search_Lucene_Index_Term $term, $position = null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   166
        if ((count($this->_terms) != 0)&&(end($this->_terms)->field != $term->field)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   167
            require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   168
            throw new Zend_Search_Lucene_Exception('All phrase terms must be in the same field: ' .
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   169
                                                   $term->field . ':' . $term->text);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   170
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   171
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   172
        $this->_terms[] = $term;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   173
        if ($position !== null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   174
            $this->_offsets[] = $position;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   175
        } else if (count($this->_offsets) != 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   176
            $this->_offsets[] = end($this->_offsets) + 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   177
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   178
            $this->_offsets[] = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   179
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   180
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   181
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   182
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   183
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   184
     * Re-write query into primitive queries in the context of specified index
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   185
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   186
     * @param Zend_Search_Lucene_Interface $index
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   187
     * @return Zend_Search_Lucene_Search_Query
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   188
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   189
    public function rewrite(Zend_Search_Lucene_Interface $index)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   190
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   191
        if (count($this->_terms) == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   192
            require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   193
            return new Zend_Search_Lucene_Search_Query_Empty();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   194
        } else if ($this->_terms[0]->field !== null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   195
            return $this;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   196
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   197
            require_once 'Zend/Search/Lucene/Search/Query/Boolean.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   198
            $query = new Zend_Search_Lucene_Search_Query_Boolean();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   199
            $query->setBoost($this->getBoost());
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   200
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   201
            foreach ($index->getFieldNames(true) as $fieldName) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   202
                $subquery = new Zend_Search_Lucene_Search_Query_Phrase();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   203
                $subquery->setSlop($this->getSlop());
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   204
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   205
                require_once 'Zend/Search/Lucene/Index/Term.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   206
                foreach ($this->_terms as $termId => $term) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   207
                    $qualifiedTerm = new Zend_Search_Lucene_Index_Term($term->text, $fieldName);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   208
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   209
                    $subquery->addTerm($qualifiedTerm, $this->_offsets[$termId]);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   210
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   211
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   212
                $query->addSubquery($subquery);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   213
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   214
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   215
            return $query;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   216
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   217
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   218
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   219
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   220
     * Optimize query in the context of specified index
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   221
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   222
     * @param Zend_Search_Lucene_Interface $index
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   223
     * @return Zend_Search_Lucene_Search_Query
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   224
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   225
    public function optimize(Zend_Search_Lucene_Interface $index)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   226
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   227
        // Check, that index contains all phrase terms
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   228
        foreach ($this->_terms as $term) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   229
            if (!$index->hasTerm($term)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   230
                require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   231
                return new Zend_Search_Lucene_Search_Query_Empty();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   232
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   233
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   234
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   235
        if (count($this->_terms) == 1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   236
            // It's one term query
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   237
            require_once 'Zend/Search/Lucene/Search/Query/Term.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   238
            $optimizedQuery = new Zend_Search_Lucene_Search_Query_Term(reset($this->_terms));
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   239
            $optimizedQuery->setBoost($this->getBoost());
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   240
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   241
            return $optimizedQuery;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   242
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   243
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   244
        if (count($this->_terms) == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   245
            require_once 'Zend/Search/Lucene/Search/Query/Empty.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   246
            return new Zend_Search_Lucene_Search_Query_Empty();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   247
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   248
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   249
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   250
        return $this;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   251
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   252
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   253
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   254
     * Returns query term
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   255
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   256
     * @return array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   257
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   258
    public function getTerms()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   259
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   260
        return $this->_terms;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   261
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   262
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   263
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   264
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   265
     * Set weight for specified term
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   266
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   267
     * @param integer $num
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   268
     * @param Zend_Search_Lucene_Search_Weight_Term $weight
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   269
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   270
    public function setWeight($num, $weight)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   271
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   272
        $this->_weights[$num] = $weight;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   273
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   274
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   275
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   276
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   277
     * Constructs an appropriate Weight implementation for this query.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   278
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   279
     * @param Zend_Search_Lucene_Interface $reader
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   280
     * @return Zend_Search_Lucene_Search_Weight
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   281
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   282
    public function createWeight(Zend_Search_Lucene_Interface $reader)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   283
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   284
        require_once 'Zend/Search/Lucene/Search/Weight/Phrase.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   285
        $this->_weight = new Zend_Search_Lucene_Search_Weight_Phrase($this, $reader);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   286
        return $this->_weight;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   287
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   288
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   289
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   290
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   291
     * Score calculator for exact phrase queries (terms sequence is fixed)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   292
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   293
     * @param integer $docId
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   294
     * @return float
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   295
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   296
    public function _exactPhraseFreq($docId)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   297
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   298
        $freq = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   299
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   300
        // Term Id with lowest cardinality
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   301
        $lowCardTermId = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   302
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   303
        // Calculate $lowCardTermId
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   304
        foreach ($this->_terms as $termId => $term) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   305
            if ($lowCardTermId === null ||
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   306
                count($this->_termsPositions[$termId][$docId]) <
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   307
                count($this->_termsPositions[$lowCardTermId][$docId]) ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   308
                    $lowCardTermId = $termId;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   309
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   310
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   311
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   312
        // Walk through positions of the term with lowest cardinality
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   313
        foreach ($this->_termsPositions[$lowCardTermId][$docId] as $lowCardPos) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   314
            // We expect phrase to be found
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   315
            $freq++;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   316
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   317
            // Walk through other terms
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   318
            foreach ($this->_terms as $termId => $term) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   319
                if ($termId != $lowCardTermId) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   320
                    $expectedPosition = $lowCardPos +
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   321
                                            ($this->_offsets[$termId] -
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   322
                                             $this->_offsets[$lowCardTermId]);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   323
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   324
                    if (!in_array($expectedPosition, $this->_termsPositions[$termId][$docId])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   325
                        $freq--;  // Phrase wasn't found.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   326
                        break;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   327
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   328
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   329
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   330
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   331
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   332
        return $freq;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   333
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   334
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   335
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   336
     * Score calculator for sloppy phrase queries (terms sequence is fixed)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   337
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   338
     * @param integer $docId
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   339
     * @param Zend_Search_Lucene_Interface $reader
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   340
     * @return float
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   341
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   342
    public function _sloppyPhraseFreq($docId, Zend_Search_Lucene_Interface $reader)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   343
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   344
        $freq = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   345
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   346
        $phraseQueue = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   347
        $phraseQueue[0] = array(); // empty phrase
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   348
        $lastTerm = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   349
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   350
        // Walk through the terms to create phrases.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   351
        foreach ($this->_terms as $termId => $term) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   352
            $queueSize = count($phraseQueue);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   353
            $firstPass = true;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   354
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   355
            // Walk through the term positions.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   356
            // Each term position produces a set of phrases.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   357
            foreach ($this->_termsPositions[$termId][$docId] as $termPosition ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   358
                if ($firstPass) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   359
                    for ($count = 0; $count < $queueSize; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   360
                        $phraseQueue[$count][$termId] = $termPosition;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   361
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   362
                } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   363
                    for ($count = 0; $count < $queueSize; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   364
                        if ($lastTerm !== null &&
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   365
                            abs( $termPosition - $phraseQueue[$count][$lastTerm] -
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   366
                                 ($this->_offsets[$termId] - $this->_offsets[$lastTerm])) > $this->_slop) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   367
                            continue;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   368
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   369
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   370
                        $newPhraseId = count($phraseQueue);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   371
                        $phraseQueue[$newPhraseId]          = $phraseQueue[$count];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   372
                        $phraseQueue[$newPhraseId][$termId] = $termPosition;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   373
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   374
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   375
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   376
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   377
                $firstPass = false;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   378
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   379
            $lastTerm = $termId;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   380
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   381
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   382
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   383
        foreach ($phraseQueue as $phrasePos) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   384
            $minDistance = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   385
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   386
            for ($shift = -$this->_slop; $shift <= $this->_slop; $shift++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   387
                $distance = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   388
                $start = reset($phrasePos) - reset($this->_offsets) + $shift;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   389
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   390
                foreach ($this->_terms as $termId => $term) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   391
                    $distance += abs($phrasePos[$termId] - $this->_offsets[$termId] - $start);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   392
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   393
                    if($distance > $this->_slop) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   394
                        break;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   395
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   396
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   397
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   398
                if ($minDistance === null || $distance < $minDistance) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   399
                    $minDistance = $distance;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   400
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   401
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   402
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   403
            if ($minDistance <= $this->_slop) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   404
                $freq += $reader->getSimilarity()->sloppyFreq($minDistance);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   405
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   406
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   407
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   408
        return $freq;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   409
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   410
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   411
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   412
     * Execute query in context of index reader
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   413
     * It also initializes necessary internal structures
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   414
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   415
     * @param Zend_Search_Lucene_Interface $reader
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   416
     * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   417
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   418
    public function execute(Zend_Search_Lucene_Interface $reader, $docsFilter = null)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   419
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   420
        $this->_resVector = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   421
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   422
        if (count($this->_terms) == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   423
            $this->_resVector = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   424
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   425
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   426
        $resVectors      = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   427
        $resVectorsSizes = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   428
        $resVectorsIds   = array(); // is used to prevent arrays comparison
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   429
        foreach ($this->_terms as $termId => $term) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   430
            $resVectors[]      = array_flip($reader->termDocs($term));
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   431
            $resVectorsSizes[] = count(end($resVectors));
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   432
            $resVectorsIds[]   = $termId;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   433
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   434
            $this->_termsPositions[$termId] = $reader->termPositions($term);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   435
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   436
        // sort resvectors in order of subquery cardinality increasing
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   437
        array_multisort($resVectorsSizes, SORT_ASC, SORT_NUMERIC,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   438
                        $resVectorsIds,   SORT_ASC, SORT_NUMERIC,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   439
                        $resVectors);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   440
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   441
        foreach ($resVectors as $nextResVector) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   442
            if($this->_resVector === null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   443
                $this->_resVector = $nextResVector;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   444
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   445
                //$this->_resVector = array_intersect_key($this->_resVector, $nextResVector);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   446
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   447
                /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   448
                 * This code is used as workaround for array_intersect_key() slowness problem.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   449
                 */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   450
                $updatedVector = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   451
                foreach ($this->_resVector as $id => $value) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   452
                    if (isset($nextResVector[$id])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   453
                        $updatedVector[$id] = $value;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   454
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   455
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   456
                $this->_resVector = $updatedVector;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   457
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   458
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   459
            if (count($this->_resVector) == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   460
                // Empty result set, we don't need to check other terms
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   461
                break;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   462
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   463
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   464
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   465
        // ksort($this->_resVector, SORT_NUMERIC);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   466
        // Docs are returned ordered. Used algorithm doesn't change elements order.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   467
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   468
        // Initialize weight if it's not done yet
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   469
        $this->_initWeight($reader);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   470
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   471
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   472
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   473
     * Get document ids likely matching the query
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   474
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   475
     * It's an array with document ids as keys (performance considerations)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   476
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   477
     * @return array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   478
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   479
    public function matchedDocs()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   480
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   481
        return $this->_resVector;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   482
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   483
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   484
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   485
     * Score specified document
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   486
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   487
     * @param integer $docId
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   488
     * @param Zend_Search_Lucene_Interface $reader
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   489
     * @return float
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   490
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   491
    public function score($docId, Zend_Search_Lucene_Interface $reader)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   492
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   493
        if (isset($this->_resVector[$docId])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   494
            if ($this->_slop == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   495
                $freq = $this->_exactPhraseFreq($docId);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   496
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   497
                $freq = $this->_sloppyPhraseFreq($docId, $reader);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   498
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   499
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   500
            if ($freq != 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   501
                $tf = $reader->getSimilarity()->tf($freq);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   502
                $weight = $this->_weight->getValue();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   503
                $norm = $reader->norm($docId, reset($this->_terms)->field);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   504
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   505
                return $tf * $weight * $norm * $this->getBoost();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   506
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   507
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   508
            // Included in result, but culculated freq is zero
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   509
            return 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   510
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   511
            return 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   512
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   513
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   514
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   515
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   516
     * Return query terms
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   517
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   518
     * @return array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   519
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   520
    public function getQueryTerms()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   521
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   522
        return $this->_terms;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   523
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   524
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   525
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   526
     * Query specific matches highlighting
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   527
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   528
     * @param Zend_Search_Lucene_Search_Highlighter_Interface $highlighter  Highlighter object (also contains doc for highlighting)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   529
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   530
    protected function _highlightMatches(Zend_Search_Lucene_Search_Highlighter_Interface $highlighter)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   531
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   532
        $words = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   533
        foreach ($this->_terms as $term) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   534
            $words[] = $term->text;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   535
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   536
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   537
        $highlighter->highlight($words);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   538
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   539
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   540
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   541
     * Print a query
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   542
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   543
     * @return string
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   544
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   545
    public function __toString()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   546
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   547
        // It's used only for query visualisation, so we don't care about characters escaping
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   548
        if (isset($this->_terms[0]) && $this->_terms[0]->field !== null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   549
            $query = $this->_terms[0]->field . ':';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   550
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   551
            $query = '';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   552
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   553
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   554
        $query .= '"';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   555
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   556
        foreach ($this->_terms as $id => $term) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   557
            if ($id != 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   558
                $query .= ' ';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   559
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   560
            $query .= $term->text;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   561
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   562
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   563
        $query .= '"';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   564
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   565
        if ($this->_slop != 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   566
            $query .= '~' . $this->_slop;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   567
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   568
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   569
        if ($this->getBoost() != 1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   570
            $query .= '^' . round($this->getBoost(), 4);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   571
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   572
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   573
        return $query;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   574
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   575
}
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   576