|
21
|
1 |
from django.conf import settings |
|
|
2 |
from models import * |
|
|
3 |
import lucene |
|
|
4 |
from ldt.text import STORE |
|
|
5 |
from ldt.text import ANALYZER |
|
|
6 |
import lxml.etree |
|
|
7 |
|
|
|
8 |
|
|
|
9 |
class AnnotIndexer(object): |
|
|
10 |
|
|
|
11 |
def __init__(self, annotList, writer): |
|
|
12 |
self.__annotList = annotList |
|
|
13 |
self.__writer = writer |
|
|
14 |
|
|
|
15 |
|
|
|
16 |
def index_all(self): |
|
|
17 |
for annot in self.__annotList: |
|
|
18 |
self.index_annotation(annot) |
|
|
19 |
|
|
|
20 |
|
|
|
21 |
def index_annotation(self, annotation): |
|
|
22 |
|
|
|
23 |
doc = lucene.Document() |
|
|
24 |
|
|
|
25 |
doc.add(lucene.Field("annotation_id", annotation.external_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED)) |
|
|
26 |
|
|
|
27 |
annottags = annotation.get_tag_list() |
|
|
28 |
tags = "" |
|
|
29 |
|
|
|
30 |
if annottags is None or len(annottags) == 0: |
|
|
31 |
tags = "" |
|
|
32 |
else: |
|
|
33 |
for tag in annottags: |
|
|
34 |
tags += tag + ";" |
|
|
35 |
|
|
|
36 |
doc.add(lucene.Field("type_doc", "text-annotation", lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
|
37 |
doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
|
38 |
doc.add(lucene.Field("title", annotation.title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
|
39 |
doc.add(lucene.Field("abstract", annotation.description, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
|
40 |
doc.add(lucene.Field("text", annotation.text, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
|
41 |
doc.add(lucene.Field("all", " ".join([tags, annotation.title, annotation.description, annotation.text]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED)) |
|
|
42 |
|
|
|
43 |
self.__writer.addDocument(doc) |
|
|
44 |
|
|
|
45 |
self.__writer.close() |
|
|
46 |
|