|
1 from django.conf import settings |
|
2 from models import * |
|
3 import lucene |
|
4 from ldt.text import STORE |
|
5 from ldt.text import ANALYZER |
|
6 import lxml.etree |
|
7 |
|
8 |
|
9 class AnnotIndexer(object): |
|
10 |
|
11 def __init__(self, annotList, writer): |
|
12 self.__annotList = annotList |
|
13 self.__writer = writer |
|
14 |
|
15 |
|
16 def index_all(self): |
|
17 for annot in self.__annotList: |
|
18 self.index_annotation(annot) |
|
19 |
|
20 |
|
21 def index_annotation(self, annotation): |
|
22 |
|
23 doc = lucene.Document() |
|
24 |
|
25 doc.add(lucene.Field("annotation_id", annotation.external_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED)) |
|
26 |
|
27 annottags = annotation.get_tag_list() |
|
28 tags = "" |
|
29 |
|
30 if annottags is None or len(annottags) == 0: |
|
31 tags = "" |
|
32 else: |
|
33 for tag in annottags: |
|
34 tags += tag + ";" |
|
35 |
|
36 doc.add(lucene.Field("type_doc", "text-annotation", lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
37 doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
38 doc.add(lucene.Field("title", annotation.title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
39 doc.add(lucene.Field("abstract", annotation.description, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
40 doc.add(lucene.Field("text", annotation.text, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
41 doc.add(lucene.Field("all", " ".join([tags, annotation.title, annotation.description, annotation.text]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
42 |
|
43 self.__writer.addDocument(doc) |
|
44 |
|
45 self.__writer.close() |
|
46 |