web/lib/Zend/Search/Lucene/Index/SegmentInfo.php
author Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Tue, 15 Mar 2011 16:32:22 +0100
changeset 68 ecaf28ffe26e
parent 0 web/Zend/Search/Lucene/Index/SegmentInfo.php@4eba9c11703f
parent 64 web/Zend/Search/Lucene/Index/SegmentInfo.php@162c1de6545a
child 207 621fa6caec0c
permissions -rw-r--r--
Merge with cc1eea280cdb9d27ecdc9a2898de7a2b9835cde7
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     1
<?php
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     2
/**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     3
 * Zend Framework
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     4
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     5
 * LICENSE
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     6
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     7
 * This source file is subject to the new BSD license that is bundled
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     8
 * with this package in the file LICENSE.txt.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     9
 * It is also available through the world-wide-web at this URL:
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    10
 * http://framework.zend.com/license/new-bsd
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    11
 * If you did not receive a copy of the license and are unable to
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    12
 * obtain it through the world-wide-web, please send an email
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    13
 * to license@zend.com so we can send you a copy immediately.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    14
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    15
 * @category   Zend
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    16
 * @package    Zend_Search_Lucene
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    17
 * @subpackage Index
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    18
 * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    19
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    20
 * @version    $Id: SegmentInfo.php 22987 2010-09-21 10:39:53Z alexander $
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    21
 */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    22
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    23
/** Zend_Search_Lucene_Index_TermsStream_Interface */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    24
require_once 'Zend/Search/Lucene/Index/TermsStream/Interface.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    25
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    26
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    27
/** Zend_Search_Lucene_Search_Similarity */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    28
require_once 'Zend/Search/Lucene/Search/Similarity.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    29
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    30
/** Zend_Search_Lucene_Index_FieldInfo */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    31
require_once 'Zend/Search/Lucene/Index/FieldInfo.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    32
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    33
/** Zend_Search_Lucene_Index_Term */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    34
require_once 'Zend/Search/Lucene/Index/Term.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    35
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    36
/** Zend_Search_Lucene_Index_TermInfo */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    37
require_once 'Zend/Search/Lucene/Index/TermInfo.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    38
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    39
/**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    40
 * @category   Zend
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    41
 * @package    Zend_Search_Lucene
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    42
 * @subpackage Index
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    43
 * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    44
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    45
 */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    46
class Zend_Search_Lucene_Index_SegmentInfo implements Zend_Search_Lucene_Index_TermsStream_Interface
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    47
{
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    48
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    49
     * "Full scan vs fetch" boundary.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    50
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    51
     * If filter selectivity is less than this value, then full scan is performed
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    52
     * (since term entries fetching has some additional overhead).
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    53
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    54
    const FULL_SCAN_VS_FETCH_BOUNDARY = 5;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    55
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    56
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    57
     * Number of docs in a segment
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    58
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    59
     * @var integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    60
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    61
    private $_docCount;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    62
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    63
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    64
     * Segment name
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    65
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    66
     * @var string
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    67
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    68
    private $_name;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    69
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    70
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    71
     * Term Dictionary Index
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    72
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    73
     * Array of arrays (Zend_Search_Lucene_Index_Term objects are represented as arrays because
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    74
     * of performance considerations)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    75
     * [0] -> $termValue
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    76
     * [1] -> $termFieldNum
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    77
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    78
     * Corresponding Zend_Search_Lucene_Index_TermInfo object stored in the $_termDictionaryInfos
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    79
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    80
     * @var array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    81
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    82
    private $_termDictionary;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    83
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    84
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    85
     * Term Dictionary Index TermInfos
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    86
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    87
     * Array of arrays (Zend_Search_Lucene_Index_TermInfo objects are represented as arrays because
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    88
     * of performance considerations)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    89
     * [0] -> $docFreq
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    90
     * [1] -> $freqPointer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    91
     * [2] -> $proxPointer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    92
     * [3] -> $skipOffset
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    93
     * [4] -> $indexPointer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    94
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    95
     * @var array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    96
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    97
    private $_termDictionaryInfos;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    98
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    99
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   100
     * Segment fields. Array of Zend_Search_Lucene_Index_FieldInfo objects for this segment
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   101
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   102
     * @var array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   103
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   104
    private $_fields;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   105
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   106
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   107
     * Field positions in a dictionary.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   108
     * (Term dictionary contains filelds ordered by names)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   109
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   110
     * @var array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   111
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   112
    private $_fieldsDicPositions;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   113
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   114
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   115
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   116
     * Associative array where the key is the file name and the value is data offset
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   117
     * in a compound segment file (.csf).
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   118
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   119
     * @var array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   120
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   121
    private $_segFiles;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   122
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   123
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   124
     * Associative array where the key is the file name and the value is file size (.csf).
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   125
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   126
     * @var array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   127
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   128
    private $_segFileSizes;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   129
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   130
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   131
     * Delete file generation number
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   132
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   133
     * -2 means autodetect latest delete generation
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   134
     * -1 means 'there is no delete file'
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   135
     *  0 means pre-2.1 format delete file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   136
     *  X specifies used delete file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   137
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   138
     * @var integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   139
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   140
    private $_delGen;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   141
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   142
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   143
     * Segment has single norms file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   144
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   145
     * If true then one .nrm file is used for all fields
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   146
     * Otherwise .fN files are used
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   147
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   148
     * @var boolean
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   149
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   150
    private $_hasSingleNormFile;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   151
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   152
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   153
     * Use compound segment file (*.cfs) to collect all other segment files
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   154
     * (excluding .del files)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   155
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   156
     * @var boolean
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   157
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   158
    private $_isCompound;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   159
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   160
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   161
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   162
     * File system adapter.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   163
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   164
     * @var Zend_Search_Lucene_Storage_Directory_Filesystem
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   165
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   166
    private $_directory;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   167
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   168
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   169
     * Normalization factors.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   170
     * An array fieldName => normVector
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   171
     * normVector is a binary string.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   172
     * Each byte corresponds to an indexed document in a segment and
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   173
     * encodes normalization factor (float value, encoded by
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   174
     * Zend_Search_Lucene_Search_Similarity::encodeNorm())
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   175
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   176
     * @var array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   177
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   178
    private $_norms = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   179
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   180
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   181
     * List of deleted documents.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   182
     * bitset if bitset extension is loaded or array otherwise.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   183
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   184
     * @var mixed
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   185
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   186
    private $_deleted = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   187
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   188
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   189
     * $this->_deleted update flag
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   190
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   191
     * @var boolean
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   192
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   193
    private $_deletedDirty = false;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   194
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   195
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   196
     * True if segment uses shared doc store
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   197
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   198
     * @var boolean
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   199
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   200
    private $_usesSharedDocStore;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   201
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   202
    /*
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   203
     * Shared doc store options.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   204
     * It's an assotiative array with the following items:
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   205
     * - 'offset'     => $docStoreOffset           The starting document in the shared doc store files where this segment's documents begin
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   206
     * - 'segment'    => $docStoreSegment          The name of the segment that has the shared doc store files.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   207
     * - 'isCompound' => $docStoreIsCompoundFile   True, if compound file format is used for the shared doc store files (.cfx file).
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   208
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   209
    private $_sharedDocStoreOptions;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   210
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   211
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   212
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   213
     * Zend_Search_Lucene_Index_SegmentInfo constructor
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   214
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   215
     * @param Zend_Search_Lucene_Storage_Directory $directory
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   216
     * @param string     $name
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   217
     * @param integer    $docCount
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   218
     * @param integer    $delGen
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   219
     * @param array|null $docStoreOptions
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   220
     * @param boolean    $hasSingleNormFile
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   221
     * @param boolean    $isCompound
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   222
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   223
    public function __construct(Zend_Search_Lucene_Storage_Directory $directory, $name, $docCount, $delGen = 0, $docStoreOptions = null, $hasSingleNormFile = false, $isCompound = null)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   224
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   225
        $this->_directory = $directory;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   226
        $this->_name      = $name;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   227
        $this->_docCount  = $docCount;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   228
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   229
        if ($docStoreOptions !== null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   230
            $this->_usesSharedDocStore    = true;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   231
            $this->_sharedDocStoreOptions = $docStoreOptions;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   232
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   233
            if ($docStoreOptions['isCompound']) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   234
                $cfxFile       = $this->_directory->getFileObject($docStoreOptions['segment'] . '.cfx');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   235
                $cfxFilesCount = $cfxFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   236
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   237
                $cfxFiles     = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   238
                $cfxFileSizes = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   239
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   240
                for ($count = 0; $count < $cfxFilesCount; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   241
                    $dataOffset = $cfxFile->readLong();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   242
                    if ($count != 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   243
                        $cfxFileSizes[$fileName] = $dataOffset - end($cfxFiles);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   244
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   245
                    $fileName            = $cfxFile->readString();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   246
                    $cfxFiles[$fileName] = $dataOffset;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   247
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   248
                if ($count != 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   249
                    $cfxFileSizes[$fileName] = $this->_directory->fileLength($docStoreOptions['segment'] . '.cfx') - $dataOffset;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   250
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   251
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   252
                $this->_sharedDocStoreOptions['files']     = $cfxFiles;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   253
                $this->_sharedDocStoreOptions['fileSizes'] = $cfxFileSizes;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   254
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   255
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   256
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   257
        $this->_hasSingleNormFile = $hasSingleNormFile;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   258
        $this->_delGen            = $delGen;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   259
        $this->_termDictionary    = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   260
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   261
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   262
        if ($isCompound !== null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   263
            $this->_isCompound    = $isCompound;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   264
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   265
            // It's a pre-2.1 segment or isCompound is set to 'unknown'
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   266
            // Detect if segment uses compound file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   267
            require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   268
            try {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   269
                // Try to open compound file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   270
                $this->_directory->getFileObject($name . '.cfs');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   271
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   272
                // Compound file is found
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   273
                $this->_isCompound = true;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   274
            } catch (Zend_Search_Lucene_Exception $e) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   275
                if (strpos($e->getMessage(), 'is not readable') !== false) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   276
                    // Compound file is not found or is not readable
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   277
                    $this->_isCompound = false;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   278
                } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   279
                    throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   280
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   281
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   282
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   283
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   284
        $this->_segFiles = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   285
        if ($this->_isCompound) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   286
            $cfsFile = $this->_directory->getFileObject($name . '.cfs');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   287
            $segFilesCount = $cfsFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   288
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   289
            for ($count = 0; $count < $segFilesCount; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   290
                $dataOffset = $cfsFile->readLong();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   291
                if ($count != 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   292
                    $this->_segFileSizes[$fileName] = $dataOffset - end($this->_segFiles);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   293
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   294
                $fileName = $cfsFile->readString();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   295
                $this->_segFiles[$fileName] = $dataOffset;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   296
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   297
            if ($count != 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   298
                $this->_segFileSizes[$fileName] = $this->_directory->fileLength($name . '.cfs') - $dataOffset;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   299
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   300
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   301
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   302
        $fnmFile = $this->openCompoundFile('.fnm');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   303
        $fieldsCount = $fnmFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   304
        $fieldNames = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   305
        $fieldNums  = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   306
        $this->_fields = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   307
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   308
        for ($count=0; $count < $fieldsCount; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   309
            $fieldName = $fnmFile->readString();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   310
            $fieldBits = $fnmFile->readByte();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   311
            $this->_fields[$count] = new Zend_Search_Lucene_Index_FieldInfo($fieldName,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   312
                                                                            $fieldBits & 0x01 /* field is indexed */,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   313
                                                                            $count,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   314
                                                                            $fieldBits & 0x02 /* termvectors are stored */,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   315
                                                                            $fieldBits & 0x10 /* norms are omitted */,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   316
                                                                            $fieldBits & 0x20 /* payloads are stored */);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   317
            if ($fieldBits & 0x10) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   318
                // norms are omitted for the indexed field
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   319
                $this->_norms[$count] = str_repeat(chr(Zend_Search_Lucene_Search_Similarity::encodeNorm(1.0)), $docCount);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   320
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   321
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   322
            $fieldNums[$count]  = $count;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   323
            $fieldNames[$count] = $fieldName;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   324
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   325
        array_multisort($fieldNames, SORT_ASC, SORT_REGULAR, $fieldNums);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   326
        $this->_fieldsDicPositions = array_flip($fieldNums);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   327
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   328
        if ($this->_delGen == -2) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   329
            // SegmentInfo constructor is invoked from index writer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   330
            // Autodetect current delete file generation number
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   331
            $this->_delGen = $this->_detectLatestDelGen();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   332
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   333
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   334
        // Load deletions
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   335
        $this->_deleted = $this->_loadDelFile();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   336
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   337
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   338
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   339
     * Load detetions file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   340
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   341
     * Returns bitset or an array depending on bitset extension availability
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   342
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   343
     * @return mixed
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   344
     * @throws Zend_Search_Lucene_Exception
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   345
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   346
    private function _loadDelFile()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   347
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   348
        if ($this->_delGen == -1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   349
            // There is no delete file for this segment
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   350
            return null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   351
        } else if ($this->_delGen == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   352
            // It's a segment with pre-2.1 format delete file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   353
            // Try to load deletions file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   354
            return $this->_loadPre21DelFile();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   355
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   356
            // It's 2.1+ format deleteions file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   357
            return $this->_load21DelFile();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   358
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   359
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   360
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   361
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   362
     * Load pre-2.1 detetions file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   363
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   364
     * Returns bitset or an array depending on bitset extension availability
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   365
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   366
     * @return mixed
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   367
     * @throws Zend_Search_Lucene_Exception
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   368
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   369
    private function _loadPre21DelFile()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   370
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   371
        require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   372
        try {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   373
            // '.del' files always stored in a separate file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   374
            // Segment compound is not used
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   375
            $delFile = $this->_directory->getFileObject($this->_name . '.del');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   376
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   377
            $byteCount = $delFile->readInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   378
            $byteCount = ceil($byteCount/8);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   379
            $bitCount  = $delFile->readInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   380
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   381
            if ($bitCount == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   382
                $delBytes = '';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   383
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   384
                $delBytes = $delFile->readBytes($byteCount);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   385
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   386
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   387
            if (extension_loaded('bitset')) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   388
                return $delBytes;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   389
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   390
                $deletions = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   391
                for ($count = 0; $count < $byteCount; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   392
                    $byte = ord($delBytes[$count]);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   393
                    for ($bit = 0; $bit < 8; $bit++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   394
                        if ($byte & (1<<$bit)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   395
                            $deletions[$count*8 + $bit] = 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   396
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   397
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   398
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   399
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   400
                return $deletions;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   401
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   402
        } catch(Zend_Search_Lucene_Exception $e) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   403
            if (strpos($e->getMessage(), 'is not readable') === false) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   404
                throw new Zend_Search_Lucene_Exception($e->getMessage(), $e->getCode(), $e);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   405
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   406
            // There is no deletion file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   407
            $this->_delGen = -1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   408
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   409
            return null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   410
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   411
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   412
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   413
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   414
     * Load 2.1+ format detetions file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   415
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   416
     * Returns bitset or an array depending on bitset extension availability
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   417
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   418
     * @return mixed
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   419
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   420
    private function _load21DelFile()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   421
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   422
        $delFile = $this->_directory->getFileObject($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   423
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   424
        $format = $delFile->readInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   425
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   426
        if ($format == (int)0xFFFFFFFF) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   427
            if (extension_loaded('bitset')) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   428
                $deletions = bitset_empty();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   429
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   430
                $deletions = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   431
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   432
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   433
            $byteCount = $delFile->readInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   434
            $bitCount  = $delFile->readInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   435
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   436
            $delFileSize = $this->_directory->fileLength($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   437
            $byteNum = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   438
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   439
            do {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   440
                $dgap = $delFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   441
                $nonZeroByte = $delFile->readByte();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   442
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   443
                $byteNum += $dgap;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   444
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   445
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   446
                if (extension_loaded('bitset')) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   447
                    for ($bit = 0; $bit < 8; $bit++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   448
                        if ($nonZeroByte & (1<<$bit)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   449
                            bitset_incl($deletions, $byteNum*8 + $bit);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   450
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   451
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   452
                    return $deletions;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   453
                } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   454
                    for ($bit = 0; $bit < 8; $bit++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   455
                        if ($nonZeroByte & (1<<$bit)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   456
                            $deletions[$byteNum*8 + $bit] = 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   457
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   458
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   459
                    return (count($deletions) > 0) ? $deletions : null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   460
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   461
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   462
            } while ($delFile->tell() < $delFileSize);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   463
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   464
            // $format is actually byte count
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   465
            $byteCount = ceil($format/8);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   466
            $bitCount  = $delFile->readInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   467
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   468
            if ($bitCount == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   469
                $delBytes = '';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   470
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   471
                $delBytes = $delFile->readBytes($byteCount);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   472
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   473
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   474
            if (extension_loaded('bitset')) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   475
                return $delBytes;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   476
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   477
                $deletions = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   478
                for ($count = 0; $count < $byteCount; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   479
                    $byte = ord($delBytes[$count]);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   480
                    for ($bit = 0; $bit < 8; $bit++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   481
                        if ($byte & (1<<$bit)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   482
                            $deletions[$count*8 + $bit] = 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   483
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   484
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   485
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   486
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   487
                return (count($deletions) > 0) ? $deletions : null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   488
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   489
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   490
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   491
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   492
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   493
     * Opens index file stoted within compound index file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   494
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   495
     * @param string $extension
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   496
     * @param boolean $shareHandler
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   497
     * @throws Zend_Search_Lucene_Exception
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   498
     * @return Zend_Search_Lucene_Storage_File
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   499
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   500
    public function openCompoundFile($extension, $shareHandler = true)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   501
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   502
        if (($extension == '.fdx'  || $extension == '.fdt')  &&  $this->_usesSharedDocStore) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   503
            $fdxFName = $this->_sharedDocStoreOptions['segment'] . '.fdx';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   504
            $fdtFName = $this->_sharedDocStoreOptions['segment'] . '.fdt';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   505
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   506
            if (!$this->_sharedDocStoreOptions['isCompound']) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   507
                $fdxFile = $this->_directory->getFileObject($fdxFName, $shareHandler);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   508
                $fdxFile->seek($this->_sharedDocStoreOptions['offset']*8, SEEK_CUR);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   509
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   510
                if ($extension == '.fdx') {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   511
                    // '.fdx' file is requested
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   512
                    return $fdxFile;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   513
                } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   514
                    // '.fdt' file is requested
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   515
                    $fdtStartOffset = $fdxFile->readLong();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   516
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   517
                    $fdtFile = $this->_directory->getFileObject($fdtFName, $shareHandler);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   518
                    $fdtFile->seek($fdtStartOffset, SEEK_CUR);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   519
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   520
                    return $fdtFile;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   521
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   522
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   523
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   524
            if( !isset($this->_sharedDocStoreOptions['files'][$fdxFName]) ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   525
                require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   526
                throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   527
                                       . $fdxFName . ' file.' );
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   528
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   529
            if( !isset($this->_sharedDocStoreOptions['files'][$fdtFName]) ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   530
                require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   531
                throw new Zend_Search_Lucene_Exception('Shared doc storage segment compound file doesn\'t contain '
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   532
                                       . $fdtFName . ' file.' );
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   533
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   534
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   535
            // Open shared docstore segment file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   536
            $cfxFile = $this->_directory->getFileObject($this->_sharedDocStoreOptions['segment'] . '.cfx', $shareHandler);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   537
            // Seek to the start of '.fdx' file within compound file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   538
            $cfxFile->seek($this->_sharedDocStoreOptions['files'][$fdxFName]);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   539
            // Seek to the start of current segment documents section
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   540
            $cfxFile->seek($this->_sharedDocStoreOptions['offset']*8, SEEK_CUR);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   541
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   542
            if ($extension == '.fdx') {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   543
                // '.fdx' file is requested
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   544
                return $cfxFile;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   545
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   546
                // '.fdt' file is requested
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   547
                $fdtStartOffset = $cfxFile->readLong();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   548
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   549
                // Seek to the start of '.fdt' file within compound file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   550
                $cfxFile->seek($this->_sharedDocStoreOptions['files'][$fdtFName]);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   551
                // Seek to the start of current segment documents section
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   552
                $cfxFile->seek($fdtStartOffset, SEEK_CUR);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   553
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   554
                return $fdtFile;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   555
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   556
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   557
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   558
        $filename = $this->_name . $extension;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   559
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   560
        if (!$this->_isCompound) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   561
            return $this->_directory->getFileObject($filename, $shareHandler);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   562
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   563
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   564
        if( !isset($this->_segFiles[$filename]) ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   565
            require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   566
            throw new Zend_Search_Lucene_Exception('Segment compound file doesn\'t contain '
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   567
                                       . $filename . ' file.' );
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   568
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   569
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   570
        $file = $this->_directory->getFileObject($this->_name . '.cfs', $shareHandler);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   571
        $file->seek($this->_segFiles[$filename]);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   572
        return $file;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   573
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   574
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   575
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   576
     * Get compound file length
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   577
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   578
     * @param string $extension
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   579
     * @return integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   580
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   581
    public function compoundFileLength($extension)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   582
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   583
        if (($extension == '.fdx'  || $extension == '.fdt')  &&  $this->_usesSharedDocStore) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   584
            $filename = $this->_sharedDocStoreOptions['segment'] . $extension;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   585
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   586
            if (!$this->_sharedDocStoreOptions['isCompound']) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   587
                return $this->_directory->fileLength($filename);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   588
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   589
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   590
            if( !isset($this->_sharedDocStoreOptions['fileSizes'][$filename]) ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   591
                require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   592
                throw new Zend_Search_Lucene_Exception('Shared doc store compound file doesn\'t contain '
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   593
                                           . $filename . ' file.' );
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   594
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   595
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   596
            return $this->_sharedDocStoreOptions['fileSizes'][$filename];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   597
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   598
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   599
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   600
        $filename = $this->_name . $extension;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   601
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   602
        // Try to get common file first
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   603
        if ($this->_directory->fileExists($filename)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   604
            return $this->_directory->fileLength($filename);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   605
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   606
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   607
        if( !isset($this->_segFileSizes[$filename]) ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   608
            require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   609
            throw new Zend_Search_Lucene_Exception('Index compound file doesn\'t contain '
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   610
                                       . $filename . ' file.' );
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   611
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   612
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   613
        return $this->_segFileSizes[$filename];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   614
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   615
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   616
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   617
     * Returns field index or -1 if field is not found
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   618
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   619
     * @param string $fieldName
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   620
     * @return integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   621
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   622
    public function getFieldNum($fieldName)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   623
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   624
        foreach( $this->_fields as $field ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   625
            if( $field->name == $fieldName ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   626
                return $field->number;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   627
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   628
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   629
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   630
        return -1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   631
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   632
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   633
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   634
     * Returns field info for specified field
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   635
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   636
     * @param integer $fieldNum
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   637
     * @return Zend_Search_Lucene_Index_FieldInfo
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   638
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   639
    public function getField($fieldNum)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   640
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   641
        return $this->_fields[$fieldNum];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   642
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   643
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   644
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   645
     * Returns array of fields.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   646
     * if $indexed parameter is true, then returns only indexed fields.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   647
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   648
     * @param boolean $indexed
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   649
     * @return array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   650
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   651
    public function getFields($indexed = false)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   652
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   653
        $result = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   654
        foreach( $this->_fields as $field ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   655
            if( (!$indexed) || $field->isIndexed ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   656
                $result[ $field->name ] = $field->name;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   657
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   658
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   659
        return $result;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   660
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   661
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   662
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   663
     * Returns array of FieldInfo objects.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   664
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   665
     * @return array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   666
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   667
    public function getFieldInfos()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   668
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   669
        return $this->_fields;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   670
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   671
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   672
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   673
     * Returns actual deletions file generation number.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   674
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   675
     * @return integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   676
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   677
    public function getDelGen()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   678
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   679
        return $this->_delGen;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   680
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   681
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   682
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   683
     * Returns the total number of documents in this segment (including deleted documents).
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   684
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   685
     * @return integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   686
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   687
    public function count()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   688
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   689
        return $this->_docCount;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   690
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   691
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   692
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   693
     * Returns number of deleted documents.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   694
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   695
     * @return integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   696
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   697
    private function _deletedCount()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   698
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   699
        if ($this->_deleted === null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   700
            return 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   701
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   702
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   703
        if (extension_loaded('bitset')) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   704
            return count(bitset_to_array($this->_deleted));
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   705
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   706
            return count($this->_deleted);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   707
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   708
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   709
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   710
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   711
     * Returns the total number of non-deleted documents in this segment.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   712
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   713
     * @return integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   714
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   715
    public function numDocs()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   716
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   717
        if ($this->hasDeletions()) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   718
            return $this->_docCount - $this->_deletedCount();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   719
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   720
            return $this->_docCount;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   721
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   722
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   723
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   724
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   725
     * Get field position in a fields dictionary
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   726
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   727
     * @param integer $fieldNum
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   728
     * @return integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   729
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   730
    private function _getFieldPosition($fieldNum) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   731
        // Treat values which are not in a translation table as a 'direct value'
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   732
        return isset($this->_fieldsDicPositions[$fieldNum]) ?
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   733
                           $this->_fieldsDicPositions[$fieldNum] : $fieldNum;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   734
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   735
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   736
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   737
     * Return segment name
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   738
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   739
     * @return string
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   740
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   741
    public function getName()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   742
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   743
        return $this->_name;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   744
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   745
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   746
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   747
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   748
     * TermInfo cache
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   749
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   750
     * Size is 1024.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   751
     * Numbers are used instead of class constants because of performance considerations
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   752
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   753
     * @var array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   754
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   755
    private $_termInfoCache = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   756
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   757
    private function _cleanUpTermInfoCache()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   758
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   759
        // Clean 256 term infos
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   760
        foreach ($this->_termInfoCache as $key => $termInfo) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   761
            unset($this->_termInfoCache[$key]);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   762
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   763
            // leave 768 last used term infos
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   764
            if (count($this->_termInfoCache) == 768) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   765
                break;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   766
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   767
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   768
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   769
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   770
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   771
     * Load terms dictionary index
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   772
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   773
     * @throws Zend_Search_Lucene_Exception
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   774
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   775
    private function _loadDictionaryIndex()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   776
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   777
        // Check, if index is already serialized
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   778
        if ($this->_directory->fileExists($this->_name . '.sti')) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   779
            // Load serialized dictionary index data
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   780
            $stiFile = $this->_directory->getFileObject($this->_name . '.sti');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   781
            $stiFileData = $stiFile->readBytes($this->_directory->fileLength($this->_name . '.sti'));
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   782
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   783
            // Load dictionary index data
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   784
            if (($unserializedData = @unserialize($stiFileData)) !== false) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   785
                list($this->_termDictionary, $this->_termDictionaryInfos) = $unserializedData;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   786
                return;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   787
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   788
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   789
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   790
        // Load data from .tii file and generate .sti file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   791
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   792
        // Prefetch dictionary index data
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   793
        $tiiFile = $this->openCompoundFile('.tii');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   794
        $tiiFileData = $tiiFile->readBytes($this->compoundFileLength('.tii'));
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   795
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   796
        /** Zend_Search_Lucene_Index_DictionaryLoader */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   797
        require_once 'Zend/Search/Lucene/Index/DictionaryLoader.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   798
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   799
        // Load dictionary index data
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   800
        list($this->_termDictionary, $this->_termDictionaryInfos) =
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   801
                    Zend_Search_Lucene_Index_DictionaryLoader::load($tiiFileData);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   802
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   803
        $stiFileData = serialize(array($this->_termDictionary, $this->_termDictionaryInfos));
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   804
        $stiFile = $this->_directory->createFile($this->_name . '.sti');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   805
        $stiFile->writeBytes($stiFileData);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   806
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   807
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   808
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   809
     * Scans terms dictionary and returns term info
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   810
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   811
     * @param Zend_Search_Lucene_Index_Term $term
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   812
     * @return Zend_Search_Lucene_Index_TermInfo
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   813
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   814
    public function getTermInfo(Zend_Search_Lucene_Index_Term $term)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   815
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   816
        $termKey = $term->key();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   817
        if (isset($this->_termInfoCache[$termKey])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   818
            $termInfo = $this->_termInfoCache[$termKey];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   819
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   820
            // Move termInfo to the end of cache
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   821
            unset($this->_termInfoCache[$termKey]);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   822
            $this->_termInfoCache[$termKey] = $termInfo;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   823
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   824
            return $termInfo;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   825
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   826
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   827
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   828
        if ($this->_termDictionary === null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   829
            $this->_loadDictionaryIndex();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   830
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   831
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   832
        $searchField = $this->getFieldNum($term->field);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   833
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   834
        if ($searchField == -1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   835
            return null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   836
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   837
        $searchDicField = $this->_getFieldPosition($searchField);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   838
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   839
        // search for appropriate value in dictionary
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   840
        $lowIndex = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   841
        $highIndex = count($this->_termDictionary)-1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   842
        while ($highIndex >= $lowIndex) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   843
            // $mid = ($highIndex - $lowIndex)/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   844
            $mid = ($highIndex + $lowIndex) >> 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   845
            $midTerm = $this->_termDictionary[$mid];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   846
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   847
            $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   848
            $delta = $searchDicField - $fieldNum;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   849
            if ($delta == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   850
                $delta = strcmp($term->text, $midTerm[1] /* text */);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   851
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   852
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   853
            if ($delta < 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   854
                $highIndex = $mid-1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   855
            } elseif ($delta > 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   856
                $lowIndex  = $mid+1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   857
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   858
                // return $this->_termDictionaryInfos[$mid]; // We got it!
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   859
                $a = $this->_termDictionaryInfos[$mid];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   860
                $termInfo = new Zend_Search_Lucene_Index_TermInfo($a[0], $a[1], $a[2], $a[3], $a[4]);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   861
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   862
                // Put loaded termInfo into cache
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   863
                $this->_termInfoCache[$termKey] = $termInfo;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   864
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   865
                return $termInfo;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   866
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   867
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   868
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   869
        if ($highIndex == -1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   870
            // Term is out of the dictionary range
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   871
            return null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   872
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   873
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   874
        $prevPosition = $highIndex;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   875
        $prevTerm = $this->_termDictionary[$prevPosition];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   876
        $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   877
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   878
        $tisFile = $this->openCompoundFile('.tis');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   879
        $tiVersion = $tisFile->readInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   880
        if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */  &&
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   881
            $tiVersion != (int)0xFFFFFFFD /* 2.1+ format    */) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   882
            require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   883
            throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   884
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   885
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   886
        $termCount     = $tisFile->readLong();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   887
        $indexInterval = $tisFile->readInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   888
        $skipInterval  = $tisFile->readInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   889
        if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   890
            $maxSkipLevels = $tisFile->readInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   891
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   892
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   893
        $tisFile->seek($prevTermInfo[4] /* indexPointer */ - (($tiVersion == (int)0xFFFFFFFD)? 24 : 20) /* header size*/, SEEK_CUR);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   894
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   895
        $termValue    = $prevTerm[1] /* text */;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   896
        $termFieldNum = $prevTerm[0] /* field */;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   897
        $freqPointer = $prevTermInfo[1] /* freqPointer */;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   898
        $proxPointer = $prevTermInfo[2] /* proxPointer */;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   899
        for ($count = $prevPosition*$indexInterval + 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   900
             $count <= $termCount &&
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   901
             ( $this->_getFieldPosition($termFieldNum) < $searchDicField ||
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   902
              ($this->_getFieldPosition($termFieldNum) == $searchDicField &&
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   903
               strcmp($termValue, $term->text) < 0) );
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   904
             $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   905
            $termPrefixLength = $tisFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   906
            $termSuffix       = $tisFile->readString();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   907
            $termFieldNum     = $tisFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   908
            $termValue        = Zend_Search_Lucene_Index_Term::getPrefix($termValue, $termPrefixLength) . $termSuffix;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   909
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   910
            $docFreq      = $tisFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   911
            $freqPointer += $tisFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   912
            $proxPointer += $tisFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   913
            if( $docFreq >= $skipInterval ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   914
                $skipOffset = $tisFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   915
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   916
                $skipOffset = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   917
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   918
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   919
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   920
        if ($termFieldNum == $searchField && $termValue == $term->text) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   921
            $termInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   922
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   923
            $termInfo = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   924
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   925
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   926
        // Put loaded termInfo into cache
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   927
        $this->_termInfoCache[$termKey] = $termInfo;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   928
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   929
        if (count($this->_termInfoCache) == 1024) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   930
            $this->_cleanUpTermInfoCache();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   931
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   932
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   933
        return $termInfo;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   934
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   935
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   936
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   937
     * Returns IDs of all the documents containing term.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   938
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   939
     * @param Zend_Search_Lucene_Index_Term $term
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   940
     * @param integer $shift
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   941
     * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   942
     * @return array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   943
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   944
    public function termDocs(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   945
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   946
        $termInfo = $this->getTermInfo($term);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   947
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   948
        if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   949
            if ($docsFilter !== null  &&  $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   950
                $docsFilter->segmentFilters[$this->_name] = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   951
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   952
            return array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   953
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   954
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   955
        $frqFile = $this->openCompoundFile('.frq');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   956
        $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   957
        $docId  = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   958
        $result = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   959
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   960
        if ($docsFilter !== null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   961
            if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   962
                require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   963
                throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   964
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   965
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   966
            if (isset($docsFilter->segmentFilters[$this->_name])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   967
                // Filter already has some data for the current segment
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   968
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   969
                // Make short name for the filter (which doesn't need additional dereferencing)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   970
                $filter = &$docsFilter->segmentFilters[$this->_name];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   971
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   972
                // Check if filter is not empty
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   973
                if (count($filter) == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   974
                    return array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   975
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   976
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   977
                if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   978
                    // Perform fetching
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   979
// ---------------------------------------------------------------
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   980
                    $updatedFilterData = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   981
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   982
                    for( $count=0; $count < $termInfo->docFreq; $count++ ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   983
                        $docDelta = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   984
                        if( $docDelta % 2 == 1 ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   985
                            $docId += ($docDelta-1)/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   986
                        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   987
                            $docId += $docDelta/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   988
                            // read freq
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   989
                            $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   990
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   991
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   992
                        if (isset($filter[$docId])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   993
                           $result[] = $shift + $docId;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   994
                           $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   995
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   996
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   997
                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   998
// ---------------------------------------------------------------
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   999
                } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1000
                    // Perform full scan
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1001
                    $updatedFilterData = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1002
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1003
                    for( $count=0; $count < $termInfo->docFreq; $count++ ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1004
                        $docDelta = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1005
                        if( $docDelta % 2 == 1 ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1006
                            $docId += ($docDelta-1)/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1007
                        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1008
                            $docId += $docDelta/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1009
                            // read freq
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1010
                            $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1011
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1012
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1013
                        if (isset($filter[$docId])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1014
                           $result[] = $shift + $docId;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1015
                           $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1016
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1017
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1018
                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1019
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1020
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1021
                // Filter is present, but doesn't has data for the current segment yet
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1022
                $filterData = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1023
                for( $count=0; $count < $termInfo->docFreq; $count++ ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1024
                    $docDelta = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1025
                    if( $docDelta % 2 == 1 ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1026
                        $docId += ($docDelta-1)/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1027
                    } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1028
                        $docId += $docDelta/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1029
                        // read freq
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1030
                        $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1031
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1032
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1033
                    $result[] = $shift + $docId;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1034
                    $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1035
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1036
                $docsFilter->segmentFilters[$this->_name] = $filterData;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1037
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1038
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1039
            for( $count=0; $count < $termInfo->docFreq; $count++ ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1040
                $docDelta = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1041
                if( $docDelta % 2 == 1 ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1042
                    $docId += ($docDelta-1)/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1043
                } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1044
                    $docId += $docDelta/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1045
                    // read freq
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1046
                    $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1047
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1048
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1049
                $result[] = $shift + $docId;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1050
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1051
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1052
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1053
        return $result;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1054
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1055
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1056
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1057
     * Returns term freqs array.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1058
     * Result array structure: array(docId => freq, ...)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1059
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1060
     * @param Zend_Search_Lucene_Index_Term $term
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1061
     * @param integer $shift
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1062
     * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1063
     * @return Zend_Search_Lucene_Index_TermInfo
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1064
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1065
    public function termFreqs(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1066
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1067
        $termInfo = $this->getTermInfo($term);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1068
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1069
        if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1070
            if ($docsFilter !== null  &&  $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1071
                $docsFilter->segmentFilters[$this->_name] = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1072
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1073
            return array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1074
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1075
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1076
        $frqFile = $this->openCompoundFile('.frq');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1077
        $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1078
        $result = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1079
        $docId = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1080
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1081
        $result = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1082
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1083
        if ($docsFilter !== null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1084
            if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1085
                require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1086
                throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1087
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1088
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1089
            if (isset($docsFilter->segmentFilters[$this->_name])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1090
                // Filter already has some data for the current segment
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1091
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1092
                // Make short name for the filter (which doesn't need additional dereferencing)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1093
                $filter = &$docsFilter->segmentFilters[$this->_name];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1094
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1095
                // Check if filter is not empty
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1096
                if (count($filter) == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1097
                    return array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1098
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1099
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1100
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1101
                if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1102
                    // Perform fetching
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1103
// ---------------------------------------------------------------
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1104
                    $updatedFilterData = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1105
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1106
                    for ($count = 0; $count < $termInfo->docFreq; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1107
                        $docDelta = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1108
                        if ($docDelta % 2 == 1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1109
                            $docId += ($docDelta-1)/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1110
                            if (isset($filter[$docId])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1111
                                $result[$shift + $docId] = 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1112
                                $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1113
                            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1114
                        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1115
                            $docId += $docDelta/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1116
                            $freq = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1117
                            if (isset($filter[$docId])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1118
                                $result[$shift + $docId] = $freq;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1119
                                $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1120
                            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1121
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1122
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1123
                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1124
// ---------------------------------------------------------------
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1125
                } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1126
                    // Perform full scan
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1127
                    $updatedFilterData = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1128
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1129
                    for ($count = 0; $count < $termInfo->docFreq; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1130
                        $docDelta = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1131
                        if ($docDelta % 2 == 1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1132
                            $docId += ($docDelta-1)/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1133
                            if (isset($filter[$docId])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1134
                                $result[$shift + $docId] = 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1135
                                $updatedFilterData[$docId] = 1; // 1 is just some constant value, so we don't need additional var dereference here
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1136
                            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1137
                        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1138
                            $docId += $docDelta/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1139
                            $freq = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1140
                            if (isset($filter[$docId])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1141
                                $result[$shift + $docId] = $freq;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1142
                                $updatedFilterData[$docId] = 1; // 1 is just some constant value, so we don't need additional var dereference here
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1143
                            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1144
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1145
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1146
                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1147
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1148
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1149
                // Filter doesn't has data for current segment
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1150
                $filterData = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1151
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1152
                for ($count = 0; $count < $termInfo->docFreq; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1153
                    $docDelta = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1154
                    if ($docDelta % 2 == 1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1155
                        $docId += ($docDelta-1)/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1156
                        $result[$shift + $docId] = 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1157
                        $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1158
                    } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1159
                        $docId += $docDelta/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1160
                        $result[$shift + $docId] = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1161
                        $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1162
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1163
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1164
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1165
                $docsFilter->segmentFilters[$this->_name] = $filterData;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1166
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1167
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1168
            for ($count = 0; $count < $termInfo->docFreq; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1169
                $docDelta = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1170
                if ($docDelta % 2 == 1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1171
                    $docId += ($docDelta-1)/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1172
                    $result[$shift + $docId] = 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1173
                } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1174
                    $docId += $docDelta/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1175
                    $result[$shift + $docId] = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1176
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1177
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1178
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1179
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1180
        return $result;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1181
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1182
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1183
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1184
     * Returns term positions array.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1185
     * Result array structure: array(docId => array(pos1, pos2, ...), ...)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1186
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1187
     * @param Zend_Search_Lucene_Index_Term $term
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1188
     * @param integer $shift
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1189
     * @param Zend_Search_Lucene_Index_DocsFilter|null $docsFilter
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1190
     * @return Zend_Search_Lucene_Index_TermInfo
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1191
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1192
    public function termPositions(Zend_Search_Lucene_Index_Term $term, $shift = 0, $docsFilter = null)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1193
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1194
        $termInfo = $this->getTermInfo($term);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1195
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1196
        if (!$termInfo instanceof Zend_Search_Lucene_Index_TermInfo) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1197
            if ($docsFilter !== null  &&  $docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1198
                $docsFilter->segmentFilters[$this->_name] = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1199
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1200
            return array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1201
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1202
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1203
        $frqFile = $this->openCompoundFile('.frq');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1204
        $frqFile->seek($termInfo->freqPointer,SEEK_CUR);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1205
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1206
        $docId = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1207
        $freqs = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1208
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1209
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1210
        if ($docsFilter !== null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1211
            if (!$docsFilter instanceof Zend_Search_Lucene_Index_DocsFilter) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1212
                require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1213
                throw new Zend_Search_Lucene_Exception('Documents filter must be an instance of Zend_Search_Lucene_Index_DocsFilter or null.');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1214
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1215
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1216
            if (isset($docsFilter->segmentFilters[$this->_name])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1217
                // Filter already has some data for the current segment
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1218
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1219
                // Make short name for the filter (which doesn't need additional dereferencing)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1220
                $filter = &$docsFilter->segmentFilters[$this->_name];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1221
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1222
                // Check if filter is not empty
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1223
                if (count($filter) == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1224
                    return array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1225
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1226
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1227
                if ($this->_docCount/count($filter) < self::FULL_SCAN_VS_FETCH_BOUNDARY) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1228
                    // Perform fetching
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1229
// ---------------------------------------------------------------
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1230
                    for ($count = 0; $count < $termInfo->docFreq; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1231
                        $docDelta = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1232
                        if ($docDelta % 2 == 1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1233
                            $docId += ($docDelta-1)/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1234
                            $freqs[$docId] = 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1235
                        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1236
                            $docId += $docDelta/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1237
                            $freqs[$docId] = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1238
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1239
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1240
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1241
                    $updatedFilterData = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1242
                    $result = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1243
                    $prxFile = $this->openCompoundFile('.prx');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1244
                    $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1245
                    foreach ($freqs as $docId => $freq) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1246
                        $termPosition = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1247
                        $positions = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1248
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1249
                        // we have to read .prx file to get right position for next doc
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1250
                        // even filter doesn't match current document
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1251
                        for ($count = 0; $count < $freq; $count++ ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1252
                            $termPosition += $prxFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1253
                            $positions[] = $termPosition;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1254
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1255
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1256
                        // Include into updated filter and into result only if doc is matched by filter
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1257
                        if (isset($filter[$docId])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1258
                            $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1259
                            $result[$shift + $docId] = $positions;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1260
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1261
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1262
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1263
                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1264
// ---------------------------------------------------------------
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1265
                } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1266
                    // Perform full scan
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1267
                    for ($count = 0; $count < $termInfo->docFreq; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1268
                        $docDelta = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1269
                        if ($docDelta % 2 == 1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1270
                            $docId += ($docDelta-1)/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1271
                            $freqs[$docId] = 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1272
                        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1273
                            $docId += $docDelta/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1274
                            $freqs[$docId] = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1275
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1276
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1277
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1278
                    $updatedFilterData = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1279
                    $result = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1280
                    $prxFile = $this->openCompoundFile('.prx');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1281
                    $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1282
                    foreach ($freqs as $docId => $freq) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1283
                        $termPosition = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1284
                        $positions = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1285
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1286
                        // we have to read .prx file to get right position for next doc
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1287
                        // even filter doesn't match current document
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1288
                        for ($count = 0; $count < $freq; $count++ ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1289
                            $termPosition += $prxFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1290
                            $positions[] = $termPosition;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1291
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1292
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1293
                        // Include into updated filter and into result only if doc is matched by filter
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1294
                        if (isset($filter[$docId])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1295
                            $updatedFilterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1296
                            $result[$shift + $docId] = $positions;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1297
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1298
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1299
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1300
                    $docsFilter->segmentFilters[$this->_name] = $updatedFilterData;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1301
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1302
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1303
                // Filter doesn't has data for current segment
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1304
                for ($count = 0; $count < $termInfo->docFreq; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1305
                    $docDelta = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1306
                    if ($docDelta % 2 == 1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1307
                        $docId += ($docDelta-1)/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1308
                        $freqs[$docId] = 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1309
                    } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1310
                        $docId += $docDelta/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1311
                        $freqs[$docId] = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1312
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1313
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1314
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1315
                $filterData = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1316
                $result = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1317
                $prxFile = $this->openCompoundFile('.prx');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1318
                $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1319
                foreach ($freqs as $docId => $freq) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1320
                    $filterData[$docId] = 1; // 1 is just a some constant value, so we don't need additional var dereference here
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1321
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1322
                    $termPosition = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1323
                    $positions = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1324
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1325
                    for ($count = 0; $count < $freq; $count++ ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1326
                        $termPosition += $prxFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1327
                        $positions[] = $termPosition;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1328
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1329
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1330
                    $result[$shift + $docId] = $positions;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1331
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1332
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1333
                $docsFilter->segmentFilters[$this->_name] = $filterData;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1334
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1335
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1336
            for ($count = 0; $count < $termInfo->docFreq; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1337
                $docDelta = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1338
                if ($docDelta % 2 == 1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1339
                    $docId += ($docDelta-1)/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1340
                    $freqs[$docId] = 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1341
                } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1342
                    $docId += $docDelta/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1343
                    $freqs[$docId] = $frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1344
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1345
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1346
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1347
            $result = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1348
            $prxFile = $this->openCompoundFile('.prx');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1349
            $prxFile->seek($termInfo->proxPointer, SEEK_CUR);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1350
            foreach ($freqs as $docId => $freq) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1351
                $termPosition = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1352
                $positions = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1353
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1354
                for ($count = 0; $count < $freq; $count++ ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1355
                    $termPosition += $prxFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1356
                    $positions[] = $termPosition;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1357
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1358
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1359
                $result[$shift + $docId] = $positions;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1360
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1361
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1362
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1363
        return $result;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1364
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1365
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1366
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1367
     * Load normalizatin factors from an index file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1368
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1369
     * @param integer $fieldNum
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1370
     * @throws Zend_Search_Lucene_Exception
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1371
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1372
    private function _loadNorm($fieldNum)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1373
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1374
        if ($this->_hasSingleNormFile) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1375
            $normfFile = $this->openCompoundFile('.nrm');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1376
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1377
            $header              = $normfFile->readBytes(3);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1378
            $headerFormatVersion = $normfFile->readByte();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1379
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1380
            if ($header != 'NRM'  ||  $headerFormatVersion != (int)0xFF) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1381
                require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1382
                throw new  Zend_Search_Lucene_Exception('Wrong norms file format.');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1383
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1384
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1385
            foreach ($this->_fields as $fNum => $fieldInfo) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1386
                if ($fieldInfo->isIndexed) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1387
                    $this->_norms[$fNum] = $normfFile->readBytes($this->_docCount);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1388
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1389
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1390
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1391
            $fFile = $this->openCompoundFile('.f' . $fieldNum);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1392
            $this->_norms[$fieldNum] = $fFile->readBytes($this->_docCount);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1393
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1394
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1395
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1396
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1397
     * Returns normalization factor for specified documents
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1398
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1399
     * @param integer $id
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1400
     * @param string $fieldName
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1401
     * @return float
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1402
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1403
    public function norm($id, $fieldName)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1404
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1405
        $fieldNum = $this->getFieldNum($fieldName);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1406
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1407
        if ( !($this->_fields[$fieldNum]->isIndexed) ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1408
            return null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1409
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1410
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1411
        if (!isset($this->_norms[$fieldNum])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1412
            $this->_loadNorm($fieldNum);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1413
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1414
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1415
        return Zend_Search_Lucene_Search_Similarity::decodeNorm( ord($this->_norms[$fieldNum][$id]) );
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1416
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1417
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1418
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1419
     * Returns norm vector, encoded in a byte string
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1420
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1421
     * @param string $fieldName
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1422
     * @return string
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1423
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1424
    public function normVector($fieldName)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1425
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1426
        $fieldNum = $this->getFieldNum($fieldName);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1427
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1428
        if ($fieldNum == -1  ||  !($this->_fields[$fieldNum]->isIndexed)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1429
            $similarity = Zend_Search_Lucene_Search_Similarity::getDefault();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1430
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1431
            return str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )),
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1432
                              $this->_docCount);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1433
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1434
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1435
        if (!isset($this->_norms[$fieldNum])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1436
            $this->_loadNorm($fieldNum);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1437
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1438
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1439
        return $this->_norms[$fieldNum];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1440
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1441
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1442
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1443
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1444
     * Returns true if any documents have been deleted from this index segment.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1445
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1446
     * @return boolean
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1447
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1448
    public function hasDeletions()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1449
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1450
        return $this->_deleted !== null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1451
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1452
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1453
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1454
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1455
     * Returns true if segment has single norms file.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1456
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1457
     * @return boolean
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1458
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1459
    public function hasSingleNormFile()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1460
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1461
        return $this->_hasSingleNormFile ? true : false;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1462
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1463
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1464
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1465
     * Returns true if segment is stored using compound segment file.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1466
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1467
     * @return boolean
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1468
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1469
    public function isCompound()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1470
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1471
        return $this->_isCompound;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1472
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1473
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1474
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1475
     * Deletes a document from the index segment.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1476
     * $id is an internal document id
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1477
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1478
     * @param integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1479
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1480
    public function delete($id)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1481
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1482
        $this->_deletedDirty = true;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1483
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1484
        if (extension_loaded('bitset')) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1485
            if ($this->_deleted === null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1486
                $this->_deleted = bitset_empty($id);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1487
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1488
            bitset_incl($this->_deleted, $id);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1489
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1490
            if ($this->_deleted === null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1491
                $this->_deleted = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1492
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1493
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1494
            $this->_deleted[$id] = 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1495
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1496
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1497
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1498
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1499
     * Checks, that document is deleted
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1500
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1501
     * @param integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1502
     * @return boolean
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1503
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1504
    public function isDeleted($id)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1505
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1506
        if ($this->_deleted === null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1507
            return false;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1508
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1509
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1510
        if (extension_loaded('bitset')) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1511
            return bitset_in($this->_deleted, $id);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1512
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1513
            return isset($this->_deleted[$id]);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1514
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1515
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1516
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1517
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1518
     * Detect latest delete generation
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1519
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1520
     * Is actualy used from writeChanges() method or from the constructor if it's invoked from
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1521
     * Index writer. In both cases index write lock is already obtained, so we shouldn't care
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1522
     * about it
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1523
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1524
     * @return integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1525
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1526
    private function _detectLatestDelGen()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1527
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1528
        $delFileList = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1529
        foreach ($this->_directory->fileList() as $file) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1530
            if ($file == $this->_name . '.del') {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1531
                // Matches <segment_name>.del file name
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1532
                $delFileList[] = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1533
            } else if (preg_match('/^' . $this->_name . '_([a-zA-Z0-9]+)\.del$/i', $file, $matches)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1534
                // Matches <segment_name>_NNN.del file names
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1535
                $delFileList[] = (int)base_convert($matches[1], 36, 10);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1536
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1537
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1538
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1539
        if (count($delFileList) == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1540
            // There is no deletions file for current segment in the directory
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1541
            // Set deletions file generation number to 1
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1542
            return -1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1543
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1544
            // There are some deletions files for current segment in the directory
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1545
            // Set deletions file generation number to the highest nuber
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1546
            return max($delFileList);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1547
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1548
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1549
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1550
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1551
     * Write changes if it's necessary.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1552
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1553
     * This method must be invoked only from the Writer _updateSegments() method,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1554
     * so index Write lock has to be already obtained.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1555
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1556
     * @internal
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1557
     * @throws Zend_Search_Lucene_Exceptions
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1558
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1559
    public function writeChanges()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1560
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1561
        // Get new generation number
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1562
        $latestDelGen = $this->_detectLatestDelGen();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1563
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1564
        if (!$this->_deletedDirty) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1565
            // There was no deletions by current process
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1566
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1567
            if ($latestDelGen == $this->_delGen) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1568
                // Delete file hasn't been updated by any concurrent process
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1569
                return;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1570
            } else if ($latestDelGen > $this->_delGen) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1571
                // Delete file has been updated by some concurrent process
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1572
                // Reload deletions file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1573
                $this->_delGen  = $latestDelGen;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1574
                $this->_deleted = $this->_loadDelFile();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1575
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1576
                return;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1577
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1578
                require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1579
                throw new Zend_Search_Lucene_Exception('Delete file processing workflow is corrupted for the segment \'' . $this->_name . '\'.');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1580
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1581
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1582
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1583
        if ($latestDelGen > $this->_delGen) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1584
            // Merge current deletions with latest deletions file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1585
            $this->_delGen = $latestDelGen;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1586
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1587
            $latestDelete = $this->_loadDelFile();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1588
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1589
            if (extension_loaded('bitset')) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1590
                $this->_deleted = bitset_union($this->_deleted, $latestDelete);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1591
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1592
                $this->_deleted += $latestDelete;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1593
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1594
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1595
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1596
        if (extension_loaded('bitset')) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1597
            $delBytes = $this->_deleted;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1598
            $bitCount = count(bitset_to_array($delBytes));
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1599
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1600
            $byteCount = floor($this->_docCount/8)+1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1601
            $delBytes = str_repeat(chr(0), $byteCount);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1602
            for ($count = 0; $count < $byteCount; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1603
                $byte = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1604
                for ($bit = 0; $bit < 8; $bit++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1605
                    if (isset($this->_deleted[$count*8 + $bit])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1606
                        $byte |= (1<<$bit);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1607
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1608
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1609
                $delBytes[$count] = chr($byte);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1610
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1611
            $bitCount = count($this->_deleted);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1612
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1613
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1614
        if ($this->_delGen == -1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1615
            // Set delete file generation number to 1
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1616
            $this->_delGen = 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1617
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1618
            // Increase delete file generation number by 1
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1619
            $this->_delGen++;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1620
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1621
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1622
        $delFile = $this->_directory->createFile($this->_name . '_' . base_convert($this->_delGen, 10, 36) . '.del');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1623
        $delFile->writeInt($this->_docCount);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1624
        $delFile->writeInt($bitCount);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1625
        $delFile->writeBytes($delBytes);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1626
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1627
        $this->_deletedDirty = false;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1628
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1629
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1630
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1631
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1632
     * Term Dictionary File object for stream like terms reading
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1633
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1634
     * @var Zend_Search_Lucene_Storage_File
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1635
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1636
    private $_tisFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1637
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1638
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1639
     * Actual offset of the .tis file data
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1640
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1641
     * @var integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1642
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1643
    private $_tisFileOffset;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1644
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1645
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1646
     * Frequencies File object for stream like terms reading
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1647
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1648
     * @var Zend_Search_Lucene_Storage_File
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1649
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1650
    private $_frqFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1651
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1652
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1653
     * Actual offset of the .frq file data
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1654
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1655
     * @var integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1656
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1657
    private $_frqFileOffset;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1658
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1659
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1660
     * Positions File object for stream like terms reading
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1661
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1662
     * @var Zend_Search_Lucene_Storage_File
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1663
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1664
    private $_prxFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1665
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1666
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1667
     * Actual offset of the .prx file in the compound file
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1668
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1669
     * @var integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1670
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1671
    private $_prxFileOffset;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1672
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1673
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1674
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1675
     * Actual number of terms in term stream
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1676
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1677
     * @var integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1678
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1679
    private $_termCount = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1680
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1681
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1682
     * Overall number of terms in term stream
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1683
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1684
     * @var integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1685
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1686
    private $_termNum = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1687
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1688
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1689
     * Segment index interval
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1690
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1691
     * @var integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1692
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1693
    private $_indexInterval;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1694
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1695
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1696
     * Segment skip interval
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1697
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1698
     * @var integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1699
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1700
    private $_skipInterval;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1701
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1702
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1703
     * Last TermInfo in a terms stream
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1704
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1705
     * @var Zend_Search_Lucene_Index_TermInfo
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1706
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1707
    private $_lastTermInfo = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1708
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1709
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1710
     * Last Term in a terms stream
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1711
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1712
     * @var Zend_Search_Lucene_Index_Term
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1713
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1714
    private $_lastTerm = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1715
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1716
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1717
     * Map of the document IDs
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1718
     * Used to get new docID after removing deleted documents.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1719
     * It's not very effective from memory usage point of view,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1720
     * but much more faster, then other methods
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1721
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1722
     * @var array|null
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1723
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1724
    private $_docMap = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1725
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1726
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1727
     * An array of all term positions in the documents.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1728
     * Array structure: array( docId => array( pos1, pos2, ...), ...)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1729
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1730
     * Is set to null if term positions loading has to be skipped
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1731
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1732
     * @var array|null
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1733
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1734
    private $_lastTermPositions;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1735
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1736
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1737
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1738
     * Terms scan mode
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1739
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1740
     * Values:
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1741
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1742
     * self::SM_TERMS_ONLY - terms are scanned, no additional info is retrieved
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1743
     * self::SM_FULL_INFO  - terms are scanned, frequency and position info is retrieved
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1744
     * self::SM_MERGE_INFO - terms are scanned, frequency and position info is retrieved
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1745
     *                       document numbers are compacted (shifted if segment has deleted documents)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1746
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1747
     * @var integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1748
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1749
    private $_termsScanMode;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1750
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1751
    /** Scan modes */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1752
    const SM_TERMS_ONLY = 0;    // terms are scanned, no additional info is retrieved
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1753
    const SM_FULL_INFO  = 1;    // terms are scanned, frequency and position info is retrieved
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1754
    const SM_MERGE_INFO = 2;    // terms are scanned, frequency and position info is retrieved
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1755
                                // document numbers are compacted (shifted if segment contains deleted documents)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1756
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1757
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1758
     * Reset terms stream
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1759
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1760
     * $startId - id for the fist document
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1761
     * $compact - remove deleted documents
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1762
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1763
     * Returns start document id for the next segment
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1764
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1765
     * @param integer $startId
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1766
     * @param integer $mode
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1767
     * @throws Zend_Search_Lucene_Exception
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1768
     * @return integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1769
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1770
    public function resetTermsStream(/** $startId = 0, $mode = self::SM_TERMS_ONLY */)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1771
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1772
        /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1773
         * SegmentInfo->resetTermsStream() method actually takes two optional parameters:
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1774
         *   $startId (default value is 0)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1775
         *   $mode (default value is self::SM_TERMS_ONLY)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1776
         */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1777
        $argList = func_get_args();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1778
        if (count($argList) > 2) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1779
            require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1780
            throw new Zend_Search_Lucene_Exception('Wrong number of arguments');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1781
        } else if (count($argList) == 2) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1782
            $startId = $argList[0];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1783
            $mode    = $argList[1];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1784
        } else if (count($argList) == 1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1785
            $startId = $argList[0];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1786
            $mode    = self::SM_TERMS_ONLY;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1787
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1788
            $startId = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1789
            $mode    = self::SM_TERMS_ONLY;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1790
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1791
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1792
        if ($this->_tisFile !== null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1793
            $this->_tisFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1794
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1795
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1796
        $this->_tisFile = $this->openCompoundFile('.tis', false);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1797
        $this->_tisFileOffset = $this->_tisFile->tell();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1798
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1799
        $tiVersion = $this->_tisFile->readInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1800
        if ($tiVersion != (int)0xFFFFFFFE /* pre-2.1 format */  &&
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1801
            $tiVersion != (int)0xFFFFFFFD /* 2.1+ format    */) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1802
            require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1803
            throw new Zend_Search_Lucene_Exception('Wrong TermInfoFile file format');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1804
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1805
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1806
        $this->_termCount     =
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1807
              $this->_termNum = $this->_tisFile->readLong(); // Read terms count
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1808
        $this->_indexInterval = $this->_tisFile->readInt();  // Read Index interval
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1809
        $this->_skipInterval  = $this->_tisFile->readInt();  // Read skip interval
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1810
        if ($tiVersion == (int)0xFFFFFFFD /* 2.1+ format */) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1811
            $maxSkipLevels = $this->_tisFile->readInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1812
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1813
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1814
        if ($this->_frqFile !== null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1815
            $this->_frqFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1816
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1817
        if ($this->_prxFile !== null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1818
            $this->_prxFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1819
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1820
        $this->_docMap = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1821
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1822
        $this->_lastTerm          = new Zend_Search_Lucene_Index_Term('', -1);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1823
        $this->_lastTermInfo      = new Zend_Search_Lucene_Index_TermInfo(0, 0, 0, 0);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1824
        $this->_lastTermPositions = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1825
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1826
        $this->_termsScanMode = $mode;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1827
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1828
        switch ($mode) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1829
            case self::SM_TERMS_ONLY:
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1830
                // Do nothing
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1831
                break;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1832
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1833
            case self::SM_FULL_INFO:
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1834
                // break intentionally omitted
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1835
            case self::SM_MERGE_INFO:
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1836
                $this->_frqFile = $this->openCompoundFile('.frq', false);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1837
                $this->_frqFileOffset = $this->_frqFile->tell();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1838
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1839
                $this->_prxFile = $this->openCompoundFile('.prx', false);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1840
                $this->_prxFileOffset = $this->_prxFile->tell();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1841
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1842
                for ($count = 0; $count < $this->_docCount; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1843
                    if (!$this->isDeleted($count)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1844
                        $this->_docMap[$count] = $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $count);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1845
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1846
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1847
                break;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1848
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1849
            default:
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1850
                require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1851
                throw new Zend_Search_Lucene_Exception('Wrong terms scaning mode specified.');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1852
                break;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1853
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1854
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1855
        // Calculate next segment start id (since $this->_docMap structure may be cleaned by $this->nextTerm() call)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1856
        $nextSegmentStartId = $startId + (($mode == self::SM_MERGE_INFO) ? count($this->_docMap) : $this->_docCount);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1857
        $this->nextTerm();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1858
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1859
        return $nextSegmentStartId;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1860
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1861
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1862
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1863
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1864
     * Skip terms stream up to the specified term preffix.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1865
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1866
     * Prefix contains fully specified field info and portion of searched term
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1867
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1868
     * @param Zend_Search_Lucene_Index_Term $prefix
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1869
     * @throws Zend_Search_Lucene_Exception
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1870
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1871
    public function skipTo(Zend_Search_Lucene_Index_Term $prefix)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1872
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1873
        if ($this->_termDictionary === null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1874
            $this->_loadDictionaryIndex();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1875
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1876
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1877
        $searchField = $this->getFieldNum($prefix->field);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1878
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1879
        if ($searchField == -1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1880
            /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1881
             * Field is not presented in this segment
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1882
             * Go to the end of dictionary
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1883
             */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1884
            $this->_tisFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1885
            $this->_frqFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1886
            $this->_prxFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1887
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1888
            $this->_lastTerm          = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1889
            $this->_lastTermInfo      = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1890
            $this->_lastTermPositions = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1891
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1892
            return;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1893
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1894
        $searchDicField = $this->_getFieldPosition($searchField);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1895
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1896
        // search for appropriate value in dictionary
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1897
        $lowIndex = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1898
        $highIndex = count($this->_termDictionary)-1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1899
        while ($highIndex >= $lowIndex) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1900
            // $mid = ($highIndex - $lowIndex)/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1901
            $mid = ($highIndex + $lowIndex) >> 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1902
            $midTerm = $this->_termDictionary[$mid];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1903
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1904
            $fieldNum = $this->_getFieldPosition($midTerm[0] /* field */);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1905
            $delta = $searchDicField - $fieldNum;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1906
            if ($delta == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1907
                $delta = strcmp($prefix->text, $midTerm[1] /* text */);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1908
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1909
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1910
            if ($delta < 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1911
                $highIndex = $mid-1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1912
            } elseif ($delta > 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1913
                $lowIndex  = $mid+1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1914
            } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1915
                // We have reached term we are looking for
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1916
                break;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1917
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1918
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1919
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1920
        if ($highIndex == -1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1921
            // Term is out of the dictionary range
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1922
            $this->_tisFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1923
            $this->_frqFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1924
            $this->_prxFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1925
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1926
            $this->_lastTerm          = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1927
            $this->_lastTermInfo      = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1928
            $this->_lastTermPositions = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1929
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1930
            return;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1931
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1932
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1933
        $prevPosition = $highIndex;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1934
        $prevTerm = $this->_termDictionary[$prevPosition];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1935
        $prevTermInfo = $this->_termDictionaryInfos[$prevPosition];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1936
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1937
        if ($this->_tisFile === null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1938
            // The end of terms stream is reached and terms dictionary file is closed
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1939
            // Perform mini-reset operation
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1940
            $this->_tisFile = $this->openCompoundFile('.tis', false);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1941
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1942
            if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1943
                $this->_frqFile = $this->openCompoundFile('.frq', false);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1944
                $this->_prxFile = $this->openCompoundFile('.prx', false);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1945
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1946
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1947
        $this->_tisFile->seek($this->_tisFileOffset + $prevTermInfo[4], SEEK_SET);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1948
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1949
        $this->_lastTerm     = new Zend_Search_Lucene_Index_Term($prevTerm[1] /* text */,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1950
                                                                 ($prevTerm[0] == -1) ? '' : $this->_fields[$prevTerm[0] /* field */]->name);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1951
        $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($prevTermInfo[0] /* docFreq */,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1952
                                                                     $prevTermInfo[1] /* freqPointer */,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1953
                                                                     $prevTermInfo[2] /* proxPointer */,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1954
                                                                     $prevTermInfo[3] /* skipOffset */);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1955
        $this->_termCount  =  $this->_termNum - $prevPosition*$this->_indexInterval;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1956
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1957
        if ($highIndex == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1958
            // skip start entry
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1959
            $this->nextTerm();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1960
        } else if ($prefix->field == $this->_lastTerm->field  &&  $prefix->text  == $this->_lastTerm->text) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1961
            // We got exact match in the dictionary index
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1962
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1963
            if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1964
                $this->_lastTermPositions = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1965
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1966
                $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1967
                $freqs = array();   $docId = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1968
                for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1969
                    $docDelta = $this->_frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1970
                    if( $docDelta % 2 == 1 ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1971
                        $docId += ($docDelta-1)/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1972
                        $freqs[ $docId ] = 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1973
                    } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1974
                        $docId += $docDelta/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1975
                        $freqs[ $docId ] = $this->_frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1976
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1977
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1978
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1979
                $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1980
                foreach ($freqs as $docId => $freq) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1981
                    $termPosition = 0;  $positions = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1982
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1983
                    for ($count = 0; $count < $freq; $count++ ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1984
                        $termPosition += $this->_prxFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1985
                        $positions[] = $termPosition;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1986
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1987
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1988
                    if (isset($this->_docMap[$docId])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1989
                        $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1990
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1991
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1992
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1993
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1994
            return;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1995
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1996
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1997
        // Search term matching specified prefix
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1998
        while ($this->_lastTerm !== null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  1999
            if ( strcmp($this->_lastTerm->field, $prefix->field) > 0  ||
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2000
                 ($prefix->field == $this->_lastTerm->field  &&  strcmp($this->_lastTerm->text, $prefix->text) >= 0) ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2001
                    // Current term matches or greate than the pattern
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2002
                    return;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2003
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2004
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2005
            $this->nextTerm();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2006
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2007
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2008
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2009
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2010
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2011
     * Scans terms dictionary and returns next term
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2012
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2013
     * @return Zend_Search_Lucene_Index_Term|null
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2014
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2015
    public function nextTerm()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2016
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2017
        if ($this->_tisFile === null  ||  $this->_termCount == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2018
            $this->_lastTerm          = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2019
            $this->_lastTermInfo      = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2020
            $this->_lastTermPositions = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2021
            $this->_docMap            = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2022
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2023
            // may be necessary for "empty" segment
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2024
            $this->_tisFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2025
            $this->_frqFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2026
            $this->_prxFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2027
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2028
            return null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2029
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2030
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2031
        $termPrefixLength = $this->_tisFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2032
        $termSuffix       = $this->_tisFile->readString();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2033
        $termFieldNum     = $this->_tisFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2034
        $termValue        = Zend_Search_Lucene_Index_Term::getPrefix($this->_lastTerm->text, $termPrefixLength) . $termSuffix;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2035
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2036
        $this->_lastTerm = new Zend_Search_Lucene_Index_Term($termValue, $this->_fields[$termFieldNum]->name);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2037
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2038
        $docFreq     = $this->_tisFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2039
        $freqPointer = $this->_lastTermInfo->freqPointer + $this->_tisFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2040
        $proxPointer = $this->_lastTermInfo->proxPointer + $this->_tisFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2041
        if ($docFreq >= $this->_skipInterval) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2042
            $skipOffset = $this->_tisFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2043
        } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2044
            $skipOffset = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2045
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2046
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2047
        $this->_lastTermInfo = new Zend_Search_Lucene_Index_TermInfo($docFreq, $freqPointer, $proxPointer, $skipOffset);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2048
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2049
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2050
        if ($this->_termsScanMode == self::SM_FULL_INFO  ||  $this->_termsScanMode == self::SM_MERGE_INFO) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2051
            $this->_lastTermPositions = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2052
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2053
            $this->_frqFile->seek($this->_lastTermInfo->freqPointer + $this->_frqFileOffset, SEEK_SET);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2054
            $freqs = array();   $docId = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2055
            for( $count = 0; $count < $this->_lastTermInfo->docFreq; $count++ ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2056
                $docDelta = $this->_frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2057
                if( $docDelta % 2 == 1 ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2058
                    $docId += ($docDelta-1)/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2059
                    $freqs[ $docId ] = 1;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2060
                } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2061
                    $docId += $docDelta/2;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2062
                    $freqs[ $docId ] = $this->_frqFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2063
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2064
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2065
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2066
            $this->_prxFile->seek($this->_lastTermInfo->proxPointer + $this->_prxFileOffset, SEEK_SET);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2067
            foreach ($freqs as $docId => $freq) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2068
                $termPosition = 0;  $positions = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2069
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2070
                for ($count = 0; $count < $freq; $count++ ) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2071
                    $termPosition += $this->_prxFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2072
                    $positions[] = $termPosition;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2073
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2074
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2075
                if (isset($this->_docMap[$docId])) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2076
                    $this->_lastTermPositions[$this->_docMap[$docId]] = $positions;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2077
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2078
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2079
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2080
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2081
        $this->_termCount--;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2082
        if ($this->_termCount == 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2083
            $this->_tisFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2084
            $this->_frqFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2085
            $this->_prxFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2086
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2087
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2088
        return $this->_lastTerm;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2089
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2090
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2091
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2092
     * Close terms stream
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2093
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2094
     * Should be used for resources clean up if stream is not read up to the end
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2095
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2096
    public function closeTermsStream()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2097
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2098
        $this->_tisFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2099
        $this->_frqFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2100
        $this->_prxFile = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2101
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2102
        $this->_lastTerm          = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2103
        $this->_lastTermInfo      = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2104
        $this->_lastTermPositions = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2105
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2106
        $this->_docMap            = null;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2107
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2108
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2109
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2110
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2111
     * Returns term in current position
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2112
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2113
     * @return Zend_Search_Lucene_Index_Term|null
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2114
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2115
    public function currentTerm()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2116
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2117
        return $this->_lastTerm;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2118
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2119
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2120
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2121
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2122
     * Returns an array of all term positions in the documents.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2123
     * Return array structure: array( docId => array( pos1, pos2, ...), ...)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2124
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2125
     * @return array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2126
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2127
    public function currentTermPositions()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2128
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2129
        return $this->_lastTermPositions;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2130
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2131
}
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
  2132