src/ldt/ldt/ldt_utils/contentindexer.py
author verrierj
Fri, 30 Sep 2011 12:19:21 +0200
changeset 191 43509690a898
parent 176 a88714473302
child 349 63f729155d81
permissions -rw-r--r--
Merge with 08e6b5dbfc933ea06390df55f948cf05b6d02584
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
     1
from django.conf import settings
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
     2
from django.db.models.signals import post_save
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
     3
from django.dispatch import receiver
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
     4
from ldt.ldt_utils.models import Segment, Content, Project
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
     5
from ldt.ldt_utils.utils import reduce_text_node
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
     6
import ldt.indexation
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
     7
import lucene
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
     8
import lxml.etree
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
     9
import urllib #@UnresolvedImport
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    10
# import ldt.utils.log
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    11
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    12
def Property(func):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    13
    return property(**func()) 
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    14
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    15
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    16
class LdtIndexer(object):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    17
    
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    18
    def __init__(self, writer, decoupage_blackList=settings.DECOUPAGE_BLACKLIST):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    19
            self.__decoupage_blacklist = decoupage_blackList
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    20
            self.__writer = writer
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    21
        
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    22
    @Property
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    23
    def decoupage_blacklist(): #@NoSelf
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    24
        doc = """get blacklist""" #@UnusedVariable
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    25
       
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    26
        def fget(self):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    27
            if self.__decoupage_blacklist is None:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    28
                self.__decoupage_blacklist = ()
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    29
            return self.__decoupage_blacklist
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    30
           
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    31
        def fset(self, value):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    32
            self.__decoupage_blacklist = value
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    33
           
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    34
        def fdel(self):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    35
            del self.__decoupage_blacklist
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    36
           
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    37
        return locals()
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    38
    
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    39
    @Property
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    40
    def writer(): #@NoSelf
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    41
        def fget(self):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    42
            return self.__writer
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    43
        return locals()
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    44
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    45
    def index_all(self):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    46
        raise NotImplemented
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    47
    
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    48
    def index_ensemble(self, ensemble, content, project=None):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    49
        ensembleId = ensemble.get(u"id", None)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    50
        
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    51
        for decoupageNode in ensemble.getchildren():
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    52
            if decoupageNode.tag != "decoupage"  or decoupageNode.get(u"id", None) in self.decoupage_blacklist:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    53
                continue
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    54
            
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    55
            decoupId = decoupageNode.get(u"id", None)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    56
            res = decoupageNode.xpath("elements/element")
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    57
            for elementNode in res:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    58
                
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    59
                elementId = elementNode.get(u"id", None)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    60
                tags = elementNode.get(u"tags", None)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    61
                
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    62
                if tags is not None:                            
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    63
                    tags.replace(u",", u";")
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    64
                
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    65
                if tags is None or len(tags) == 0:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    66
                    tags = u""
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    67
                    restagnode = elementNode.xpath("tag/text()", smart_strings=False)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    68
                    for tagnode in restagnode:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    69
                        tags = tags + u" ; " + tagnode
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    70
                        
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    71
                if tags is None or len(tags) == 0:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    72
                    tags = u""
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    73
                    restagnode = elementNode.xpath("tags/tag/text()", smart_strings=False)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    74
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    75
                    for tagnode in restagnode:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    76
                        tags = tags + u" ; " + tagnode
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    77
                
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    78
                if tags is None:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    79
                    tags = u""
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    80
                tags = u";".join([tag[0:50] for tag in tags.split(u";")])
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    81
                
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    82
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    83
                title = reduce_text_node(elementNode, "title/text()")                
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    84
                abstract = reduce_text_node(elementNode, "abstract/text()")
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    85
                
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    86
                author = elementNode.get("author", "")
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    87
                start_ts = int(float(elementNode.get("begin", "-1")))
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    88
                duration = int(float(elementNode.get("dur", "0")))
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    89
                date_str = elementNode.get("date", "")
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    90
                ldt_id = u""
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    91
                if project:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    92
                    ldt_id = project.ldt_id
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    93
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    94
                doc = lucene.Document()
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    95
                doc.add(lucene.Field("type_doc", "annotation", lucene.Field.Store.NO, lucene.Field.Index.NOT_ANALYZED))        
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    96
                doc.add(lucene.Field("iri_id", content.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    97
                doc.add(lucene.Field("project_id", ldt_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    98
                doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO))
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    99
                doc.add(lucene.Field("decoupage_id", decoupId, lucene.Field.Store.YES, lucene.Field.Index.NO))
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   100
                doc.add(lucene.Field("element_id", elementId, lucene.Field.Store.YES, lucene.Field.Index.NO))                        
176
a88714473302 Added a page to display search results
verrierj
parents: 111
diff changeset
   101
                doc.add(lucene.Field("tags", tags, lucene.Field.Store.YES, lucene.Field.Index.ANALYZED))
a88714473302 Added a page to display search results
verrierj
parents: 111
diff changeset
   102
                doc.add(lucene.Field("title", title, lucene.Field.Store.YES, lucene.Field.Index.ANALYZED))
a88714473302 Added a page to display search results
verrierj
parents: 111
diff changeset
   103
                doc.add(lucene.Field("abstract", abstract, lucene.Field.Store.YES, lucene.Field.Index.ANALYZED))
a88714473302 Added a page to display search results
verrierj
parents: 111
diff changeset
   104
                doc.add(lucene.Field("all", " ".join([tags, title, abstract]), lucene.Field.Store.YES, lucene.Field.Index.ANALYZED))
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   105
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   106
                seg = Segment(content=content,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   107
                              iri_id=content.iri_id,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   108
                              ensemble_id=ensembleId,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   109
                              cutting_id=decoupId,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   110
                              element_id=elementId,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   111
                              tags=tags,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   112
                              title=title,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   113
                              abstract=abstract,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   114
                              duration=duration,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   115
                              author=author,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   116
                              start_ts=start_ts,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   117
                              date=date_str,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   118
                              project_obj=project,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   119
                              project_id=ldt_id)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   120
                seg.save()
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   121
                self.writer.addDocument(doc)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   122
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   123
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   124
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   125
class ContentIndexer(LdtIndexer):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   126
        
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   127
        def __init__(self, contentList, writer, decoupage_blackList=settings.DECOUPAGE_BLACKLIST):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   128
            super(ContentIndexer, self).__init__(writer, decoupage_blackList)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   129
            self.__contentList = contentList
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   130
                                       
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   131
        def index_all(self):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   132
            for content in self.__contentList:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   133
                self.index_content(content)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   134
                
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   135
        def index_content(self, content):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   136
            url = content.iri_url()
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   137
            filepath = urllib.urlopen(url)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   138
            doc = lxml.etree.parse(filepath) #@UndefinedVariable
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   139
           
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   140
            self.writer.deleteDocuments(lucene.Term("iri_id", content.iri_id))
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   141
            Segment.objects.filter(iri_id=content.iri_id).delete() #@UndefinedVariable
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   142
            
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   143
            res = doc.xpath("/iri/body/ensembles/ensemble")
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   144
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   145
            for ensemble in res:                
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   146
                self.index_ensemble(ensemble, content)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   147
            
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   148
            self.writer.commit()
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   149
            
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   150
            
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   151
class ProjectIndexer(LdtIndexer):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   152
        
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   153
        def __init__(self, projectList, writer, decoupage_blackList=settings.DECOUPAGE_BLACKLIST):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   154
            super(ProjectIndexer, self).__init__(writer, decoupage_blackList)                
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   155
            self.__projectList = projectList
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   156
                                   
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   157
        def index_all(self):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   158
            for project in self.__projectList:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   159
                self.index_project(project)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   160
 
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   161
        def index_project(self, project):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   162
            
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   163
            # pocketfilms.utils.log.debug("Indexing project : "+str(project.iri_id))
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   164
            doc = lxml.etree.fromstring(project.ldt) #@UndefinedVariable
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   165
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   166
            self.writer.deleteDocuments(lucene.Term("project_id", project.ldt_id))
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   167
            Segment.objects.filter(project_obj__ldt_id=project.ldt_id).delete() #@UndefinedVariable
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   168
            
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   169
            res = doc.xpath("/iri/annotations/content")
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   170
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   171
            for content in res:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   172
                contentId = content.get(u"id", None)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   173
                content_obj = None
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   174
                
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   175
                clist = Content.objects.filter(iri_id = contentId) #@UndefinedVariable
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   176
                if len(clist) > 0:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   177
                    content_obj = clist[0]
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   178
 
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   179
                for ensemble in content.getchildren():
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   180
                    self.index_ensemble(ensemble, content_obj, project)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   181
            
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   182
            self.writer.commit()
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   183
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   184
@receiver(post_save, sender=Project)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   185
def index_project(sender, **kwargs):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   186
    if settings.AUTO_INDEX_AFTER_SAVE:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   187
        instance = kwargs['instance']
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   188
        writer = ldt.indexation.get_writer()
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   189
        try:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   190
            if instance.state != 2:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   191
                writer.deleteDocuments(lucene.Term("project_id", instance.ldt_id))
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   192
                Segment.objects.filter(project_obj__ldt_id=instance.ldt_id).delete() #@UndefinedVariable
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   193
            else:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   194
                projectIndexer = ProjectIndexer([instance], writer)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   195
                projectIndexer.index_all()
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   196
        finally:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   197
            writer.close()