src/ldt/ldt/ldt_utils/contentindexer.py
author cavaliet
Wed, 09 Oct 2013 10:14:55 +0200
changeset 1240 a9d953866a50
parent 1117 3bab1e42acfa
child 1296 1a24fb79eb11
permissions -rw-r--r--
Added tag V01.51.05 for changeset 150a76f0f05c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
     1
from StringIO import StringIO
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
     2
from django.dispatch import receiver
863
3eae57bb42b3 correct ref to tagging settings
ymh <ymh.work@gmail.com>
parents: 852
diff changeset
     3
from ldt import settings
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
     4
from ldt.indexation import object_delete, object_insert
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
     5
from ldt.ldt_utils.events import post_project_save
1074
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
     6
from ldt.ldt_utils.models import Segment, Content, Project
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
     7
from ldt.ldt_utils.stat import update_stat_project, add_annotation_to_stat
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
     8
from ldt.ldt_utils.utils import reduce_text_node
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
     9
from ldt.utils.url import request_with_auth
863
3eae57bb42b3 correct ref to tagging settings
ymh <ymh.work@gmail.com>
parents: 852
diff changeset
    10
from tagging import settings as tagging_settings
3eae57bb42b3 correct ref to tagging settings
ymh <ymh.work@gmail.com>
parents: 852
diff changeset
    11
import logging
3eae57bb42b3 correct ref to tagging settings
ymh <ymh.work@gmail.com>
parents: 852
diff changeset
    12
import lxml.etree #@UnresolvedImport
852
393bcc75d26a solve pb with logging and tags
ymh <ymh.work@gmail.com>
parents: 810
diff changeset
    13
import tagging.utils
393bcc75d26a solve pb with logging and tags
ymh <ymh.work@gmail.com>
parents: 810
diff changeset
    14
393bcc75d26a solve pb with logging and tags
ymh <ymh.work@gmail.com>
parents: 810
diff changeset
    15
logger = logging.getLogger(__name__)
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    16
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    17
def Property(func):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    18
    return property(**func()) 
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    19
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    20
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    21
class LdtIndexer(object):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    22
    
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    23
    def __init__(self, object_list, decoupage_blackList=settings.DECOUPAGE_BLACKLIST, callback=None):
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    24
        self.__object_list = object_list
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    25
        self.__decoupage_blacklist = decoupage_blackList
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    26
        self.__callback = callback
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    27
        self.__segment_cache = []
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    28
        
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    29
    @Property
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    30
    def decoupage_blacklist(): #@NoSelf
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    31
        doc = """get blacklist""" #@UnusedVariable
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    32
       
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    33
        def fget(self):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    34
            if self.__decoupage_blacklist is None:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    35
                self.__decoupage_blacklist = ()
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    36
            return self.__decoupage_blacklist
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    37
           
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    38
        def fset(self, value):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    39
            self.__decoupage_blacklist = value
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    40
           
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    41
        def fdel(self):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    42
            del self.__decoupage_blacklist
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    43
           
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    44
        return locals()
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    45
    
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    46
    def index_all(self):
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    47
        for i,obj in enumerate(self.__object_list):
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    48
            if self.__callback:
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    49
                self.__callback(i,obj)
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    50
            self.index_object(obj)
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    51
    
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    52
    
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    53
    def index_object(self, obj):
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    54
        
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    55
        self._do_index_object(obj)
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    56
        
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    57
        if self.__segment_cache:
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    58
            object_insert(Segment, self.__segment_cache, 'id_hash')
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    59
            self.__segment_cache = []
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    60
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    61
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    62
    def _do_index_object(self, obj):
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
    63
        raise NotImplementedError()
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    64
    
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    65
    def index_ensemble(self, ensemble, content, project=None):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    66
        ensembleId = ensemble.get(u"id", None)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    67
        
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    68
        for decoupageNode in ensemble.getchildren():
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    69
            if decoupageNode.tag != "decoupage"  or decoupageNode.get(u"id", None) in self.decoupage_blacklist:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    70
                continue
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    71
            
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    72
            decoupId = decoupageNode.get(u"id", None)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    73
            res = decoupageNode.xpath("elements/element")
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    74
            for elementNode in res:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    75
                
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    76
                elementId = elementNode.get(u"id", None)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    77
                tags = elementNode.get(u"tags", None)
852
393bcc75d26a solve pb with logging and tags
ymh <ymh.work@gmail.com>
parents: 810
diff changeset
    78
                                
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    79
                if tags is None or len(tags) == 0:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    80
                    tags = u""
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    81
                    restagnode = elementNode.xpath("tag/text()", smart_strings=False)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    82
                    for tagnode in restagnode:
852
393bcc75d26a solve pb with logging and tags
ymh <ymh.work@gmail.com>
parents: 810
diff changeset
    83
                        tags = tags + u"," + tagnode
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    84
                        
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    85
                if tags is None or len(tags) == 0:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    86
                    tags = u""
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    87
                    restagnode = elementNode.xpath("tags/tag/text()", smart_strings=False)
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    88
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    89
                    for tagnode in restagnode:
852
393bcc75d26a solve pb with logging and tags
ymh <ymh.work@gmail.com>
parents: 810
diff changeset
    90
                        tags = tags + u"," + tagnode
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    91
                
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    92
                if tags is None:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    93
                    tags = u""
1046
643a0f1991c0 correct tagging for segment.
cavaliet
parents: 922
diff changeset
    94
                
863
3eae57bb42b3 correct ref to tagging settings
ymh <ymh.work@gmail.com>
parents: 852
diff changeset
    95
                tags_list = [tag[:tagging_settings.MAX_TAG_LENGTH] for tag in tagging.utils.parse_tag_input(tags)]
852
393bcc75d26a solve pb with logging and tags
ymh <ymh.work@gmail.com>
parents: 810
diff changeset
    96
                tags = u",".join(tags_list)
1046
643a0f1991c0 correct tagging for segment.
cavaliet
parents: 922
diff changeset
    97
                if u"," not in tags:
643a0f1991c0 correct tagging for segment.
cavaliet
parents: 922
diff changeset
    98
                    tags = u"," + tags
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
    99
                
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   100
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   101
                title = reduce_text_node(elementNode, "title/text()")                
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   102
                abstract = reduce_text_node(elementNode, "abstract/text()")
468
d1ff0694500b Add polemic properties to contents
verrierj
parents: 467
diff changeset
   103
                polemics = elementNode.xpath('meta/polemics/polemic/text()')
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   104
                
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   105
                author = elementNode.get("author", "")
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   106
                start_ts = int(float(elementNode.get("begin", "-1")))
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   107
                duration = int(float(elementNode.get("dur", "0")))
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   108
                date_str = elementNode.get("date", "")
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   109
                ldt_id = u""
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   110
                if project:
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   111
                    ldt_id = project.ldt_id
810
e7546394653c add audio annotation to segment api and correct reindex command.
cavaliet
parents: 718
diff changeset
   112
                # audio annotation management
e7546394653c add audio annotation to segment api and correct reindex command.
cavaliet
parents: 718
diff changeset
   113
                audio_src = u""
e7546394653c add audio annotation to segment api and correct reindex command.
cavaliet
parents: 718
diff changeset
   114
                audio_href = u""
e7546394653c add audio annotation to segment api and correct reindex command.
cavaliet
parents: 718
diff changeset
   115
                audio_node = elementNode.xpath('audio')
e7546394653c add audio annotation to segment api and correct reindex command.
cavaliet
parents: 718
diff changeset
   116
                if audio_node:
e7546394653c add audio annotation to segment api and correct reindex command.
cavaliet
parents: 718
diff changeset
   117
                    audio_src = audio_node[0].get(u"source", u"")
e7546394653c add audio annotation to segment api and correct reindex command.
cavaliet
parents: 718
diff changeset
   118
                    audio_href = audio_node[0].text
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   119
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   120
                seg = Segment.create(content=content,
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   121
                              iri_id=content.iri_id,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   122
                              ensemble_id=ensembleId,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   123
                              cutting_id=decoupId,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   124
                              element_id=elementId,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   125
                              tags=tags,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   126
                              title=title,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   127
                              abstract=abstract,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   128
                              duration=duration,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   129
                              author=author,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   130
                              start_ts=start_ts,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   131
                              date=date_str,
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   132
                              project_obj=project,
810
e7546394653c add audio annotation to segment api and correct reindex command.
cavaliet
parents: 718
diff changeset
   133
                              project_id=ldt_id,
e7546394653c add audio annotation to segment api and correct reindex command.
cavaliet
parents: 718
diff changeset
   134
                              audio_src=audio_src,
e7546394653c add audio annotation to segment api and correct reindex command.
cavaliet
parents: 718
diff changeset
   135
                              audio_href=audio_href)
468
d1ff0694500b Add polemic properties to contents
verrierj
parents: 467
diff changeset
   136
                seg.polemics = seg.get_polemic(polemics)
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   137
                if settings.LDT_INDEXATION_INSERT_BATCH_SIZE < 2:
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   138
                    seg.save()
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   139
                else:
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   140
                    self.__segment_cache.append(seg)
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   141
                    if not (len(self.__segment_cache)%settings.LDT_INDEXATION_INSERT_BATCH_SIZE):
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   142
                        object_insert(Segment, self.__segment_cache)
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   143
                        self.__segment_cache = []
716
31dc2726ca51 centralise les appel à lucene
ymh <ymh.work@gmail.com>
parents: 628
diff changeset
   144
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   145
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   146
class ContentIndexer(LdtIndexer):
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   147
        
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   148
    def _do_index_object(self, obj):
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   149
        
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   150
        content = obj 
628
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   151
        url = content.iri_url()
922
cba34a867804 correct bug on indexation.
ymh <ymh.work@gmail.com>
parents: 896
diff changeset
   152
        _, file_content = request_with_auth(url)
cba34a867804 correct bug on indexation.
ymh <ymh.work@gmail.com>
parents: 896
diff changeset
   153
        doc = lxml.etree.parse(StringIO(file_content)) #@UndefinedVariable
628
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   154
       
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   155
        object_delete(Segment, iri_id=content.iri_id, project_id='')
628
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   156
        
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   157
        res = doc.xpath("/iri/body/ensembles/ensemble")
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   158
628
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   159
        for ensemble in res:                
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   160
            self.index_ensemble(ensemble, content)
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   161
                            
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   162
            
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   163
class ProjectIndexer(LdtIndexer):
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   164
                              
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   165
    def _do_index_object(self, obj):
560
1cb2a4a573e1 correct annoations api handler + ldt encoding
ymh <ymh.work@gmail.com>
parents: 468
diff changeset
   166
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   167
        project = obj
628
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   168
        # pocketfilms.utils.log.debug("Indexing project : "+str(project.iri_id))
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   169
        doc = lxml.etree.fromstring(project.ldt_encoded) #@UndefinedVariable
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   170
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   171
        object_delete(Segment, project_obj__ldt_id=project.ldt_id)
628
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   172
       
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   173
        res = doc.xpath("/iri/annotations/content")
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   174
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   175
        for content in res:
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   176
            contentId = content.get(u"id", None)
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   177
            content_obj = None
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   178
628
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   179
            clist = Content.objects.filter(iri_id = contentId) #@UndefinedVariable
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   180
            if len(clist) > 0:
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   181
                content_obj = clist[0]
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   182
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   183
            for ensemble in content.getchildren():
1f4fd6aed2d0 correct initialization.
ymh <ymh.work@gmail.com>
parents: 602
diff changeset
   184
                self.index_ensemble(ensemble, content_obj, project)
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   185
1072
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   186
@receiver(post_project_save)
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   187
def index_project(**kwargs):
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   188
    must_reindex = kwargs.get("must_reindex", True)
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   189
    if must_reindex and settings.AUTO_INDEX_AFTER_SAVE:
111
4535dafa6007 improve releasing of resources when indexing + convert line endings to unix
ymh <ymh.work@gmail.com>
parents: 103
diff changeset
   190
        instance = kwargs['instance']
1072
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   191
        if instance.state != Project.PUBLISHED:
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   192
            object_delete(Segment, project_obj__ldt_id=instance.ldt_id)
718
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents: 716
diff changeset
   193
            update_stat_project(instance)
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents: 716
diff changeset
   194
        else:
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents: 716
diff changeset
   195
            projectIndexer = ProjectIndexer([instance])
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents: 716
diff changeset
   196
            projectIndexer.index_all()
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents: 716
diff changeset
   197
            update_stat_project(instance)
1072
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   198
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   199
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   200
1074
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   201
def add_segment(params):
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   202
                                
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   203
    project = params.get("project",None)
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   204
    content = params.get("content",None)
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   205
    ensemble_id = params.get("ensemble_id", "")
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   206
    cutting_id = params.get("cutting_id", "")
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   207
    element_id = params.get("element_id", "")
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   208
    title = params.get("title", "")
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   209
    abstract = params.get("abstract", "")
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   210
    tags_str = params.get("tags", "")
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   211
    start_ts = params.get("start_ts", 0)
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   212
    duration = params.get("duration", 0)
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   213
    author = params.get("author", "")
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   214
    date_str = params.get("date", "")
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   215
    audio_src = params.get("audio_src", "")
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   216
    audio_href = params.get("audio_href", "")
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   217
    polemics = params.get("polemics", "")
1072
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   218
    
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 1074
diff changeset
   219
    seg = Segment.create(content=content,
1074
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   220
              iri_id=content.iri_id if content is not None else "",
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   221
              ensemble_id=ensemble_id,
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   222
              cutting_id=cutting_id,
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   223
              element_id=element_id,
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   224
              tags=tags_str,
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   225
              title=title,
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   226
              abstract=abstract,
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   227
              duration=duration,
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   228
              author=author,
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   229
              start_ts=start_ts,
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   230
              date=date_str,
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   231
              project_obj=project,
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   232
              project_id=project.ldt_id if project is not None else "",
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   233
              audio_src=audio_src,
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   234
              audio_href=audio_href)
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   235
    seg.polemics = seg.get_polemic(polemics)
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   236
    seg.save()
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   237
    add_annotation_to_stat(seg.content, seg.start_ts, seg.start_ts+seg.duration)
1072
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   238
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   239
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   240
def delete_segment(project, project_id, iri_id, ensemble_id, cutting_id, element_id):
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   241
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   242
    # delete Segment
1074
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   243
    for seg in Segment.objects.filter(project_id=project_id, iri_id=iri_id, ensemble_id=ensemble_id, cutting_id=cutting_id, element_id=element_id):        
1072
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   244
        seg.delete()
1074
36f657714851 optimize adding an annotation
ymh <ymh.work@gmail.com>
parents: 1072
diff changeset
   245
        add_annotation_to_stat(seg.content, seg.start_ts, seg.start_ts+seg.duration)
1072
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   246
    
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   247
    
687dabdd25a7 Add an argument to project save to avoid indexation
ymh <ymh.work@gmail.com>
parents: 1046
diff changeset
   248