web/ldt/text/annotindexer.py
changeset 21 1a061f244254
equal deleted inserted replaced
20:20c41a7e2173 21:1a061f244254
       
     1 from django.conf import settings
       
     2 from models import *
       
     3 import lucene
       
     4 from ldt.text import STORE
       
     5 from ldt.text import ANALYZER
       
     6 import lxml.etree
       
     7 
       
     8 
       
     9 class AnnotIndexer(object):
       
    10     
       
    11     def __init__(self, annotList, writer):
       
    12         self.__annotList = annotList
       
    13         self.__writer = writer
       
    14         
       
    15     
       
    16     def index_all(self):
       
    17         for annot in self.__annotList:
       
    18             self.index_annotation(annot)
       
    19     
       
    20     
       
    21     def index_annotation(self, annotation):
       
    22         
       
    23         doc = lucene.Document()
       
    24         
       
    25         doc.add(lucene.Field("annotation_id", annotation.external_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))              
       
    26         
       
    27         annottags = annotation.get_tag_list()
       
    28         tags = ""
       
    29         
       
    30         if annottags is None or len(annottags) == 0:
       
    31             tags = ""
       
    32         else:
       
    33             for tag in annottags:
       
    34                 tags += tag + ";" 
       
    35         
       
    36         doc.add(lucene.Field("type_doc", "text-annotation", lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))              
       
    37         doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
       
    38         doc.add(lucene.Field("title", annotation.title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
       
    39         doc.add(lucene.Field("abstract", annotation.description, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
       
    40         doc.add(lucene.Field("text", annotation.text, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
       
    41         doc.add(lucene.Field("all", " ".join([tags, annotation.title, annotation.description, annotation.text]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
       
    42 
       
    43         self.__writer.addDocument(doc)
       
    44             
       
    45         self.__writer.close()
       
    46