web/ldt/text/annotindexer.py
author wakimd
Wed, 22 Dec 2010 12:01:05 +0100
changeset 25 c8dfd7ea87e5
parent 21 1a061f244254
permissions -rw-r--r--
Corrections on merge

from django.conf import settings
from models import *
import lucene
from ldt.text import STORE
from ldt.text import ANALYZER
import lxml.etree


class AnnotIndexer(object):
    
    def __init__(self, annotList, writer):
        self.__annotList = annotList
        self.__writer = writer
        
    
    def index_all(self):
        for annot in self.__annotList:
            self.index_annotation(annot)
    
    
    def index_annotation(self, annotation):
        
        doc = lucene.Document()
        
        doc.add(lucene.Field("annotation_id", annotation.external_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))              
        
        annottags = annotation.get_tag_list()
        tags = ""
        
        if annottags is None or len(annottags) == 0:
            tags = ""
        else:
            for tag in annottags:
                tags += tag + ";" 
        
        doc.add(lucene.Field("type_doc", "text-annotation", lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))              
        doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
        doc.add(lucene.Field("title", annotation.title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
        doc.add(lucene.Field("abstract", annotation.description, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
        doc.add(lucene.Field("text", annotation.text, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
        doc.add(lucene.Field("all", " ".join([tags, annotation.title, annotation.description, annotation.text]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))

        self.__writer.addDocument(doc)
            
        self.__writer.close()