from django.conf import settings
from models import *
import lucene
from ldt.text import STORE
from ldt.text import ANALYZER
import lxml.etree
class AnnotIndexer(object):
def __init__(self, annotList, writer):
self.__annotList = annotList
self.__writer = writer
def index_all(self):
for annot in self.__annotList:
self.index_annotation(annot)
def index_annotation(self, annotation):
doc = lucene.Document()
doc.add(lucene.Field("annotation_id", annotation.external_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
annottags = annotation.get_tag_list()
tags = ""
if annottags is None or len(annottags) == 0:
tags = ""
else:
for tag in annottags:
tags += tag + ";"
doc.add(lucene.Field("type_doc", "text-annotation", lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("title", annotation.title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("abstract", annotation.description, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("text", annotation.text, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("all", " ".join([tags, annotation.title, annotation.description, annotation.text]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
self.__writer.addDocument(doc)
self.__writer.close()