web/ldt/text/annotindexer.py
changeset 24 9e19b7ae3780
parent 21 1a061f244254
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/web/ldt/text/annotindexer.py	Thu Dec 16 15:00:30 2010 +0100
@@ -0,0 +1,46 @@
+from django.conf import settings
+from models import *
+import lucene
+from ldt.text import STORE
+from ldt.text import ANALYZER
+import lxml.etree
+
+
+class AnnotIndexer(object):
+    
+    def __init__(self, annotList, writer):
+        self.__annotList = annotList
+        self.__writer = writer
+        
+    
+    def index_all(self):
+        for annot in self.__annotList:
+            self.index_annotation(annot)
+    
+    
+    def index_annotation(self, annotation):
+        
+        doc = lucene.Document()
+        
+        doc.add(lucene.Field("annotation_id", annotation.external_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))              
+        
+        annottags = annotation.get_tag_list()
+        tags = ""
+        
+        if annottags is None or len(annottags) == 0:
+            tags = ""
+        else:
+            for tag in annottags:
+                tags += tag + ";" 
+        
+        doc.add(lucene.Field("type_doc", "text-annotation", lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))              
+        doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
+        doc.add(lucene.Field("title", annotation.title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
+        doc.add(lucene.Field("abstract", annotation.description, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
+        doc.add(lucene.Field("text", annotation.text, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
+        doc.add(lucene.Field("all", " ".join([tags, annotation.title, annotation.description, annotation.text]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
+
+        self.__writer.addDocument(doc)
+            
+        self.__writer.close()
+        
\ No newline at end of file