src/ldt/ldt/text/annotindexer.py
author ymh <ymh.work@gmail.com>
Fri, 20 Jul 2012 12:40:08 +0200
changeset 716 31dc2726ca51
parent 111 4535dafa6007
permissions -rw-r--r--
centralise les appel à lucene
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
716
31dc2726ca51 centralise les appel à lucene
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
     1
import ldt.indexation
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
     2
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
     3
class AnnotIndexer(object):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
     4
    
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
     5
    def __init__(self, annotList, writer):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
     6
        self.__annotList = annotList
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
     7
        self.__writer = writer
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
     8
        
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
     9
    
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
    10
    def index_all(self):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
    11
        for annot in self.__annotList:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
    12
            self.index_annotation(annot)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
    13
    
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
    14
    
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
    15
    def index_annotation(self, annotation):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
    16
        
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
    17
        
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
    18
        annottags = annotation.get_tag_list()
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
    19
        tags = ""
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
    20
        
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
    21
        if annottags is None or len(annottags) == 0:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
    22
            tags = ""
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
    23
        else:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
    24
            for tag in annottags:
716
31dc2726ca51 centralise les appel à lucene
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
    25
                tags += tag + ";"
31dc2726ca51 centralise les appel à lucene
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
    26
31dc2726ca51 centralise les appel à lucene
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
    27
        ldt.indexation.add_document([
31dc2726ca51 centralise les appel à lucene
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
    28
            ("annotation_id", annotation.external_id, True, "NOT_ANALYZED"),
31dc2726ca51 centralise les appel à lucene
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
    29
            ("type_doc", "text-annotation", False, "NOT_ANALYZED"),
31dc2726ca51 centralise les appel à lucene
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
    30
            ("tags", tags, False, "ANALYZED"),
31dc2726ca51 centralise les appel à lucene
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
    31
            ("title", annotation.title, False, "ANALYZED"),
31dc2726ca51 centralise les appel à lucene
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
    32
            ("abstract", annotation.description, False, "ANALYZED"),
31dc2726ca51 centralise les appel à lucene
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
    33
            ("text", annotation.text, False, "ANALYZED"),
31dc2726ca51 centralise les appel à lucene
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
    34
            ("all", " ".join([tags, annotation.title, annotation.description, annotation.text]), False, "ANALYZED"),
31dc2726ca51 centralise les appel à lucene
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
    35
        ], self.__writer)
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 77
diff changeset
    36