diff -r 65baf3ff7315 -r 9e19b7ae3780 web/ldt/text/annotindexer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/ldt/text/annotindexer.py Thu Dec 16 15:00:30 2010 +0100 @@ -0,0 +1,46 @@ +from django.conf import settings +from models import * +import lucene +from ldt.text import STORE +from ldt.text import ANALYZER +import lxml.etree + + +class AnnotIndexer(object): + + def __init__(self, annotList, writer): + self.__annotList = annotList + self.__writer = writer + + + def index_all(self): + for annot in self.__annotList: + self.index_annotation(annot) + + + def index_annotation(self, annotation): + + doc = lucene.Document() + + doc.add(lucene.Field("annotation_id", annotation.external_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED)) + + annottags = annotation.get_tag_list() + tags = "" + + if annottags is None or len(annottags) == 0: + tags = "" + else: + for tag in annottags: + tags += tag + ";" + + doc.add(lucene.Field("type_doc", "text-annotation", lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) + doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) + doc.add(lucene.Field("title", annotation.title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) + doc.add(lucene.Field("abstract", annotation.description, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) + doc.add(lucene.Field("text", annotation.text, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) + doc.add(lucene.Field("all", " ".join([tags, annotation.title, annotation.description, annotation.text]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) + + self.__writer.addDocument(doc) + + self.__writer.close() + \ No newline at end of file