diff -r 5b37998e522e -r 162c1de6545a web/lib/Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/lib/Zend/Search/Lucene/Index/SegmentWriter/DocumentWriter.php Fri Mar 11 15:05:35 2011 +0100 @@ -0,0 +1,230 @@ +_termDocs = array(); + $this->_termDictionary = array(); + } + + + /** + * Adds a document to this segment. + * + * @param Zend_Search_Lucene_Document $document + * @throws Zend_Search_Lucene_Exception + */ + public function addDocument(Zend_Search_Lucene_Document $document) + { + /** Zend_Search_Lucene_Search_Similarity */ + require_once 'Zend/Search/Lucene/Search/Similarity.php'; + + $storedFields = array(); + $docNorms = array(); + $similarity = Zend_Search_Lucene_Search_Similarity::getDefault(); + + foreach ($document->getFieldNames() as $fieldName) { + $field = $document->getField($fieldName); + + if ($field->storeTermVector) { + /** + * @todo term vector storing support + */ + require_once 'Zend/Search/Lucene/Exception.php'; + throw new Zend_Search_Lucene_Exception('Store term vector functionality is not supported yet.'); + } + + if ($field->isIndexed) { + if ($field->isTokenized) { + /** Zend_Search_Lucene_Analysis_Analyzer */ + require_once 'Zend/Search/Lucene/Analysis/Analyzer.php'; + + $analyzer = Zend_Search_Lucene_Analysis_Analyzer::getDefault(); + $analyzer->setInput($field->value, $field->encoding); + + $position = 0; + $tokenCounter = 0; + while (($token = $analyzer->nextToken()) !== null) { + $tokenCounter++; + + $term = new Zend_Search_Lucene_Index_Term($token->getTermText(), $field->name); + $termKey = $term->key(); + + if (!isset($this->_termDictionary[$termKey])) { + // New term + $this->_termDictionary[$termKey] = $term; + $this->_termDocs[$termKey] = array(); + $this->_termDocs[$termKey][$this->_docCount] = array(); + } else if (!isset($this->_termDocs[$termKey][$this->_docCount])) { + // Existing term, but new term entry + $this->_termDocs[$termKey][$this->_docCount] = array(); + } + $position += $token->getPositionIncrement(); + $this->_termDocs[$termKey][$this->_docCount][] = $position; + } + + if ($tokenCounter == 0) { + // Field contains empty value. Treat it as non-indexed and non-tokenized + $field = clone($field); + $field->isIndexed = $field->isTokenized = false; + } else { + $docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name, + $tokenCounter)* + $document->boost* + $field->boost )); + } + } else if (($fieldUtf8Value = $field->getUtf8Value()) == '') { + // Field contains empty value. Treat it as non-indexed and non-tokenized + $field = clone($field); + $field->isIndexed = $field->isTokenized = false; + } else { + $term = new Zend_Search_Lucene_Index_Term($fieldUtf8Value, $field->name); + $termKey = $term->key(); + + if (!isset($this->_termDictionary[$termKey])) { + // New term + $this->_termDictionary[$termKey] = $term; + $this->_termDocs[$termKey] = array(); + $this->_termDocs[$termKey][$this->_docCount] = array(); + } else if (!isset($this->_termDocs[$termKey][$this->_docCount])) { + // Existing term, but new term entry + $this->_termDocs[$termKey][$this->_docCount] = array(); + } + $this->_termDocs[$termKey][$this->_docCount][] = 0; // position + + $docNorms[$field->name] = chr($similarity->encodeNorm( $similarity->lengthNorm($field->name, 1)* + $document->boost* + $field->boost )); + } + } + + if ($field->isStored) { + $storedFields[] = $field; + } + + $this->addField($field); + } + + foreach ($this->_fields as $fieldName => $field) { + if (!$field->isIndexed) { + continue; + } + + if (!isset($this->_norms[$fieldName])) { + $this->_norms[$fieldName] = str_repeat(chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )), + $this->_docCount); + } + + if (isset($docNorms[$fieldName])){ + $this->_norms[$fieldName] .= $docNorms[$fieldName]; + } else { + $this->_norms[$fieldName] .= chr($similarity->encodeNorm( $similarity->lengthNorm($fieldName, 0) )); + } + } + + $this->addStoredFields($storedFields); + } + + + /** + * Dump Term Dictionary (.tis) and Term Dictionary Index (.tii) segment files + */ + protected function _dumpDictionary() + { + ksort($this->_termDictionary, SORT_STRING); + + $this->initializeDictionaryFiles(); + + foreach ($this->_termDictionary as $termId => $term) { + $this->addTerm($term, $this->_termDocs[$termId]); + } + + $this->closeDictionaryFiles(); + } + + + /** + * Close segment, write it to disk and return segment info + * + * @return Zend_Search_Lucene_Index_SegmentInfo + */ + public function close() + { + if ($this->_docCount == 0) { + return null; + } + + $this->_dumpFNM(); + $this->_dumpDictionary(); + + $this->_generateCFS(); + + /** Zend_Search_Lucene_Index_SegmentInfo */ + require_once 'Zend/Search/Lucene/Index/SegmentInfo.php'; + + return new Zend_Search_Lucene_Index_SegmentInfo($this->_directory, + $this->_name, + $this->_docCount, + -1, + null, + true, + true); + } + +} +