src/ldt/ldt/ldt_utils/contentindexer.py
changeset 1117 3bab1e42acfa
parent 1074 36f657714851
child 1296 1a24fb79eb11
equal deleted inserted replaced
1100:b6d17223140f 1117:3bab1e42acfa
       
     1 from StringIO import StringIO
     1 from django.dispatch import receiver
     2 from django.dispatch import receiver
     2 from ldt import settings
     3 from ldt import settings
       
     4 from ldt.indexation import object_delete, object_insert
       
     5 from ldt.ldt_utils.events import post_project_save
     3 from ldt.ldt_utils.models import Segment, Content, Project
     6 from ldt.ldt_utils.models import Segment, Content, Project
     4 from ldt.ldt_utils.events import post_project_save
       
     5 from ldt.ldt_utils.stat import update_stat_project, add_annotation_to_stat
     7 from ldt.ldt_utils.stat import update_stat_project, add_annotation_to_stat
     6 from ldt.ldt_utils.utils import reduce_text_node
     8 from ldt.ldt_utils.utils import reduce_text_node
       
     9 from ldt.utils.url import request_with_auth
     7 from tagging import settings as tagging_settings
    10 from tagging import settings as tagging_settings
     8 import logging
    11 import logging
     9 import lxml.etree #@UnresolvedImport
    12 import lxml.etree #@UnresolvedImport
    10 import tagging.utils
    13 import tagging.utils
    11 from ldt.utils.url import request_with_auth
       
    12 from StringIO import StringIO
       
    13 
    14 
    14 logger = logging.getLogger(__name__)
    15 logger = logging.getLogger(__name__)
    15 
    16 
    16 def Property(func):
    17 def Property(func):
    17     return property(**func()) 
    18     return property(**func()) 
    18 
    19 
    19 
    20 
    20 class LdtIndexer(object):
    21 class LdtIndexer(object):
    21     
    22     
    22     def __init__(self, decoupage_blackList=settings.DECOUPAGE_BLACKLIST):
    23     def __init__(self, object_list, decoupage_blackList=settings.DECOUPAGE_BLACKLIST, callback=None):
    23             self.__decoupage_blacklist = decoupage_blackList
    24         self.__object_list = object_list
       
    25         self.__decoupage_blacklist = decoupage_blackList
       
    26         self.__callback = callback
       
    27         self.__segment_cache = []
    24         
    28         
    25     @Property
    29     @Property
    26     def decoupage_blacklist(): #@NoSelf
    30     def decoupage_blacklist(): #@NoSelf
    27         doc = """get blacklist""" #@UnusedVariable
    31         doc = """get blacklist""" #@UnusedVariable
    28        
    32        
    38             del self.__decoupage_blacklist
    42             del self.__decoupage_blacklist
    39            
    43            
    40         return locals()
    44         return locals()
    41     
    45     
    42     def index_all(self):
    46     def index_all(self):
    43         raise NotImplemented
    47         for i,obj in enumerate(self.__object_list):
       
    48             if self.__callback:
       
    49                 self.__callback(i,obj)
       
    50             self.index_object(obj)
       
    51     
       
    52     
       
    53     def index_object(self, obj):
       
    54         
       
    55         self._do_index_object(obj)
       
    56         
       
    57         if self.__segment_cache:
       
    58             object_insert(Segment, self.__segment_cache, 'id_hash')
       
    59             self.__segment_cache = []
       
    60 
       
    61 
       
    62     def _do_index_object(self, obj):
       
    63         raise NotImplementedError()
    44     
    64     
    45     def index_ensemble(self, ensemble, content, project=None):
    65     def index_ensemble(self, ensemble, content, project=None):
    46         ensembleId = ensemble.get(u"id", None)
    66         ensembleId = ensemble.get(u"id", None)
    47         
    67         
    48         for decoupageNode in ensemble.getchildren():
    68         for decoupageNode in ensemble.getchildren():
    95                 audio_node = elementNode.xpath('audio')
   115                 audio_node = elementNode.xpath('audio')
    96                 if audio_node:
   116                 if audio_node:
    97                     audio_src = audio_node[0].get(u"source", u"")
   117                     audio_src = audio_node[0].get(u"source", u"")
    98                     audio_href = audio_node[0].text
   118                     audio_href = audio_node[0].text
    99 
   119 
   100                 seg = Segment(content=content,
   120                 seg = Segment.create(content=content,
   101                               iri_id=content.iri_id,
   121                               iri_id=content.iri_id,
   102                               ensemble_id=ensembleId,
   122                               ensemble_id=ensembleId,
   103                               cutting_id=decoupId,
   123                               cutting_id=decoupId,
   104                               element_id=elementId,
   124                               element_id=elementId,
   105                               tags=tags,
   125                               tags=tags,
   112                               project_obj=project,
   132                               project_obj=project,
   113                               project_id=ldt_id,
   133                               project_id=ldt_id,
   114                               audio_src=audio_src,
   134                               audio_src=audio_src,
   115                               audio_href=audio_href)
   135                               audio_href=audio_href)
   116                 seg.polemics = seg.get_polemic(polemics)
   136                 seg.polemics = seg.get_polemic(polemics)
   117                 seg.save()
   137                 if settings.LDT_INDEXATION_INSERT_BATCH_SIZE < 2:
       
   138                     seg.save()
       
   139                 else:
       
   140                     self.__segment_cache.append(seg)
       
   141                     if not (len(self.__segment_cache)%settings.LDT_INDEXATION_INSERT_BATCH_SIZE):
       
   142                         object_insert(Segment, self.__segment_cache)
       
   143                         self.__segment_cache = []
   118 
   144 
   119 
   145 
   120 class ContentIndexer(LdtIndexer):
   146 class ContentIndexer(LdtIndexer):
   121         
   147         
   122     def __init__(self, contentList, decoupage_blackList=settings.DECOUPAGE_BLACKLIST, callback=None):
   148     def _do_index_object(self, obj):
   123         super(ContentIndexer, self).__init__(decoupage_blackList)
   149         
   124         self.__contentList = contentList
   150         content = obj 
   125         self.__callback = callback
       
   126                                    
       
   127     def index_all(self):
       
   128         for i,content in enumerate(self.__contentList):
       
   129             if self.__callback:
       
   130                 self.__callback(i,content)
       
   131             self.index_content(content)
       
   132             
       
   133     def index_content(self, content):        
       
   134         url = content.iri_url()
   151         url = content.iri_url()
   135         _, file_content = request_with_auth(url)
   152         _, file_content = request_with_auth(url)
   136         doc = lxml.etree.parse(StringIO(file_content)) #@UndefinedVariable
   153         doc = lxml.etree.parse(StringIO(file_content)) #@UndefinedVariable
   137        
   154        
   138         Segment.objects.filter(iri_id=content.iri_id).delete() #@UndefinedVariable
   155         object_delete(Segment, iri_id=content.iri_id, project_id='')
   139         
   156         
   140         res = doc.xpath("/iri/body/ensembles/ensemble")
   157         res = doc.xpath("/iri/body/ensembles/ensemble")
   141 
   158 
   142         for ensemble in res:                
   159         for ensemble in res:                
   143             self.index_ensemble(ensemble, content)
   160             self.index_ensemble(ensemble, content)
   144                     
   161                             
   145             
   162             
   146 class ProjectIndexer(LdtIndexer):
   163 class ProjectIndexer(LdtIndexer):
   147 
       
   148     def __init__(self, projectList, decoupage_blackList=settings.DECOUPAGE_BLACKLIST, callback=None):
       
   149         super(ProjectIndexer, self).__init__(decoupage_blackList)                
       
   150         self.__projectList = projectList
       
   151         self.__callback = callback
       
   152                               
   164                               
   153     def index_all(self):
   165     def _do_index_object(self, obj):
   154         for i,project in enumerate(self.__projectList):
   166 
   155             if self.__callback:
   167         project = obj
   156                 self.__callback(i,project)
       
   157 
       
   158             self.index_project(project)
       
   159 
       
   160     def index_project(self, project):
       
   161 
       
   162         # pocketfilms.utils.log.debug("Indexing project : "+str(project.iri_id))
   168         # pocketfilms.utils.log.debug("Indexing project : "+str(project.iri_id))
   163         doc = lxml.etree.fromstring(project.ldt_encoded) #@UndefinedVariable
   169         doc = lxml.etree.fromstring(project.ldt_encoded) #@UndefinedVariable
   164 
   170 
   165         Segment.objects.filter(project_obj__ldt_id=project.ldt_id).delete() #@UndefinedVariable
   171         object_delete(Segment, project_obj__ldt_id=project.ldt_id)
   166        
   172        
   167         res = doc.xpath("/iri/annotations/content")
   173         res = doc.xpath("/iri/annotations/content")
   168 
   174 
   169         for content in res:
   175         for content in res:
   170             contentId = content.get(u"id", None)
   176             contentId = content.get(u"id", None)
   181 def index_project(**kwargs):
   187 def index_project(**kwargs):
   182     must_reindex = kwargs.get("must_reindex", True)
   188     must_reindex = kwargs.get("must_reindex", True)
   183     if must_reindex and settings.AUTO_INDEX_AFTER_SAVE:
   189     if must_reindex and settings.AUTO_INDEX_AFTER_SAVE:
   184         instance = kwargs['instance']
   190         instance = kwargs['instance']
   185         if instance.state != Project.PUBLISHED:
   191         if instance.state != Project.PUBLISHED:
   186             Segment.objects.filter(project_obj__ldt_id=instance.ldt_id).delete() #@UndefinedVariable
   192             object_delete(Segment, project_obj__ldt_id=instance.ldt_id)
   187             update_stat_project(instance)
   193             update_stat_project(instance)
   188         else:
   194         else:
   189             projectIndexer = ProjectIndexer([instance])
   195             projectIndexer = ProjectIndexer([instance])
   190             projectIndexer.index_all()
   196             projectIndexer.index_all()
   191             update_stat_project(instance)
   197             update_stat_project(instance)
   208     date_str = params.get("date", "")
   214     date_str = params.get("date", "")
   209     audio_src = params.get("audio_src", "")
   215     audio_src = params.get("audio_src", "")
   210     audio_href = params.get("audio_href", "")
   216     audio_href = params.get("audio_href", "")
   211     polemics = params.get("polemics", "")
   217     polemics = params.get("polemics", "")
   212     
   218     
   213     seg = Segment(content=content,
   219     seg = Segment.create(content=content,
   214               iri_id=content.iri_id if content is not None else "",
   220               iri_id=content.iri_id if content is not None else "",
   215               ensemble_id=ensemble_id,
   221               ensemble_id=ensemble_id,
   216               cutting_id=cutting_id,
   222               cutting_id=cutting_id,
   217               element_id=element_id,
   223               element_id=element_id,
   218               tags=tags_str,
   224               tags=tags_str,