web/lib/Zend/Search/Lucene/Index/SegmentMerger.php
author Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Mon, 19 Dec 2011 00:56:02 +0100
changeset 428 8b51d23b758f
parent 68 ecaf28ffe26e
child 207 621fa6caec0c
permissions -rw-r--r--
correct sync
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     1
<?php
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     2
/**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     3
 * Zend Framework
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     4
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     5
 * LICENSE
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     6
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     7
 * This source file is subject to the new BSD license that is bundled
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     8
 * with this package in the file LICENSE.txt.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
     9
 * It is also available through the world-wide-web at this URL:
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    10
 * http://framework.zend.com/license/new-bsd
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    11
 * If you did not receive a copy of the license and are unable to
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    12
 * obtain it through the world-wide-web, please send an email
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    13
 * to license@zend.com so we can send you a copy immediately.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    14
 *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    15
 * @category   Zend
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    16
 * @package    Zend_Search_Lucene
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    17
 * @subpackage Index
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    18
 * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    19
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    20
 * @version    $Id: SegmentMerger.php 20096 2010-01-06 02:05:09Z bkarwin $
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    21
 */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    22
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    23
/** Zend_Search_Lucene_Index_SegmentInfo */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    24
require_once 'Zend/Search/Lucene/Index/SegmentInfo.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    25
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    26
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    27
/**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    28
 * @category   Zend
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    29
 * @package    Zend_Search_Lucene
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    30
 * @subpackage Index
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    31
 * @copyright  Copyright (c) 2005-2010 Zend Technologies USA Inc. (http://www.zend.com)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    32
 * @license    http://framework.zend.com/license/new-bsd     New BSD License
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    33
 */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    34
class Zend_Search_Lucene_Index_SegmentMerger
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    35
{
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    36
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    37
     * Target segment writer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    38
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    39
     * @var Zend_Search_Lucene_Index_SegmentWriter_StreamWriter
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    40
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    41
    private $_writer;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    42
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    43
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    44
     * Number of docs in a new segment
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    45
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    46
     * @var integer
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    47
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    48
    private $_docCount;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    49
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    50
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    51
     * A set of segments to be merged
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    52
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    53
     * @var array Zend_Search_Lucene_Index_SegmentInfo
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    54
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    55
    private $_segmentInfos = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    56
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    57
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    58
     * Flag to signal, that merge is already done
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    59
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    60
     * @var boolean
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    61
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    62
    private $_mergeDone = false;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    63
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    64
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    65
     * Field map
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    66
     * [<segment_name>][<field_number>] => <target_field_number>
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    67
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    68
     * @var array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    69
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    70
    private $_fieldsMap = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    71
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    72
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    73
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    74
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    75
     * Object constructor.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    76
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    77
     * Creates new segment merger with $directory as target to merge segments into
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    78
     * and $name as a name of new segment
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    79
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    80
     * @param Zend_Search_Lucene_Storage_Directory $directory
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    81
     * @param string $name
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    82
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    83
    public function __construct($directory, $name)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    84
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    85
        /** Zend_Search_Lucene_Index_SegmentWriter_StreamWriter */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    86
        require_once 'Zend/Search/Lucene/Index/SegmentWriter/StreamWriter.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    87
        $this->_writer = new Zend_Search_Lucene_Index_SegmentWriter_StreamWriter($directory, $name);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    88
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    89
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    90
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    91
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    92
     * Add segmnet to a collection of segments to be merged
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    93
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    94
     * @param Zend_Search_Lucene_Index_SegmentInfo $segment
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    95
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    96
    public function addSource(Zend_Search_Lucene_Index_SegmentInfo $segmentInfo)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    97
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    98
        $this->_segmentInfos[$segmentInfo->getName()] = $segmentInfo;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
    99
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   100
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   101
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   102
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   103
     * Do merge.
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   104
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   105
     * Returns number of documents in newly created segment
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   106
     *
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   107
     * @return Zend_Search_Lucene_Index_SegmentInfo
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   108
     * @throws Zend_Search_Lucene_Exception
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   109
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   110
    public function merge()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   111
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   112
        if ($this->_mergeDone) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   113
            require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   114
            throw new Zend_Search_Lucene_Exception('Merge is already done.');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   115
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   116
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   117
        if (count($this->_segmentInfos) < 1) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   118
            require_once 'Zend/Search/Lucene/Exception.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   119
            throw new Zend_Search_Lucene_Exception('Wrong number of segments to be merged ('
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   120
                                                 . count($this->_segmentInfos)
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   121
                                                 . ').');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   122
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   123
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   124
        $this->_mergeFields();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   125
        $this->_mergeNorms();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   126
        $this->_mergeStoredFields();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   127
        $this->_mergeTerms();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   128
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   129
        $this->_mergeDone = true;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   130
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   131
        return $this->_writer->close();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   132
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   133
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   134
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   135
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   136
     * Merge fields information
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   137
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   138
    private function _mergeFields()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   139
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   140
        foreach ($this->_segmentInfos as $segName => $segmentInfo) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   141
            foreach ($segmentInfo->getFieldInfos() as $fieldInfo) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   142
                $this->_fieldsMap[$segName][$fieldInfo->number] = $this->_writer->addFieldInfo($fieldInfo);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   143
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   144
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   145
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   146
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   147
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   148
     * Merge field's normalization factors
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   149
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   150
    private function _mergeNorms()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   151
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   152
        foreach ($this->_writer->getFieldInfos() as $fieldInfo) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   153
            if ($fieldInfo->isIndexed) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   154
                foreach ($this->_segmentInfos as $segName => $segmentInfo) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   155
                    if ($segmentInfo->hasDeletions()) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   156
                        $srcNorm = $segmentInfo->normVector($fieldInfo->name);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   157
                        $norm    = '';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   158
                        $docs    = $segmentInfo->count();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   159
                        for ($count = 0; $count < $docs; $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   160
                            if (!$segmentInfo->isDeleted($count)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   161
                                $norm .= $srcNorm[$count];
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   162
                            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   163
                        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   164
                        $this->_writer->addNorm($fieldInfo->name, $norm);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   165
                    } else {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   166
                        $this->_writer->addNorm($fieldInfo->name, $segmentInfo->normVector($fieldInfo->name));
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   167
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   168
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   169
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   170
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   171
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   172
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   173
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   174
     * Merge fields information
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   175
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   176
    private function _mergeStoredFields()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   177
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   178
        $this->_docCount = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   179
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   180
        foreach ($this->_segmentInfos as $segName => $segmentInfo) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   181
            $fdtFile = $segmentInfo->openCompoundFile('.fdt');
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   182
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   183
            for ($count = 0; $count < $segmentInfo->count(); $count++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   184
                $fieldCount = $fdtFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   185
                $storedFields = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   186
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   187
                for ($count2 = 0; $count2 < $fieldCount; $count2++) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   188
                    $fieldNum = $fdtFile->readVInt();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   189
                    $bits = $fdtFile->readByte();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   190
                    $fieldInfo = $segmentInfo->getField($fieldNum);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   191
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   192
                    if (!($bits & 2)) { // Text data
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   193
                        $storedFields[] =
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   194
                                 new Zend_Search_Lucene_Field($fieldInfo->name,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   195
                                                              $fdtFile->readString(),
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   196
                                                              'UTF-8',
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   197
                                                              true,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   198
                                                              $fieldInfo->isIndexed,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   199
                                                              $bits & 1 );
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   200
                    } else {            // Binary data
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   201
                        $storedFields[] =
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   202
                                 new Zend_Search_Lucene_Field($fieldInfo->name,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   203
                                                              $fdtFile->readBinary(),
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   204
                                                              '',
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   205
                                                              true,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   206
                                                              $fieldInfo->isIndexed,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   207
                                                              $bits & 1,
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   208
                                                              true);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   209
                    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   210
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   211
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   212
                if (!$segmentInfo->isDeleted($count)) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   213
                    $this->_docCount++;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   214
                    $this->_writer->addStoredFields($storedFields);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   215
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   216
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   217
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   218
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   219
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   220
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   221
    /**
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   222
     * Merge fields information
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   223
     */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   224
    private function _mergeTerms()
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   225
    {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   226
        /** Zend_Search_Lucene_Index_TermsPriorityQueue */
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   227
        require_once 'Zend/Search/Lucene/Index/TermsPriorityQueue.php';
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   228
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   229
        $segmentInfoQueue = new Zend_Search_Lucene_Index_TermsPriorityQueue();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   230
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   231
        $segmentStartId = 0;
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   232
        foreach ($this->_segmentInfos as $segName => $segmentInfo) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   233
            $segmentStartId = $segmentInfo->resetTermsStream($segmentStartId, Zend_Search_Lucene_Index_SegmentInfo::SM_MERGE_INFO);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   234
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   235
            // Skip "empty" segments
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   236
            if ($segmentInfo->currentTerm() !== null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   237
                $segmentInfoQueue->put($segmentInfo);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   238
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   239
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   240
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   241
        $this->_writer->initializeDictionaryFiles();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   242
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   243
        $termDocs = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   244
        while (($segmentInfo = $segmentInfoQueue->pop()) !== null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   245
            // Merge positions array
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   246
            $termDocs += $segmentInfo->currentTermPositions();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   247
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   248
            if ($segmentInfoQueue->top() === null ||
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   249
                $segmentInfoQueue->top()->currentTerm()->key() !=
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   250
                            $segmentInfo->currentTerm()->key()) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   251
                // We got new term
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   252
                ksort($termDocs, SORT_NUMERIC);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   253
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   254
                // Add term if it's contained in any document
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   255
                if (count($termDocs) > 0) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   256
                    $this->_writer->addTerm($segmentInfo->currentTerm(), $termDocs);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   257
                }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   258
                $termDocs = array();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   259
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   260
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   261
            $segmentInfo->nextTerm();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   262
            // check, if segment dictionary is finished
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   263
            if ($segmentInfo->currentTerm() !== null) {
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   264
                // Put segment back into the priority queue
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   265
                $segmentInfoQueue->put($segmentInfo);
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   266
            }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   267
        }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   268
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   269
        $this->_writer->closeDictionaryFiles();
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   270
    }
4eba9c11703f first import
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff changeset
   271
}