src/ldt/ldt/ldt_utils/contentindexer.py
author cavaliet
Fri, 04 Jan 2013 17:13:54 +0100
changeset 1046 643a0f1991c0
parent 922 cba34a867804
child 1072 687dabdd25a7
permissions -rw-r--r--
correct tagging for segment.

from django.db.models.signals import post_save
from django.dispatch import receiver
from ldt import settings
from ldt.ldt_utils.models import Segment, Content, Project
from ldt.ldt_utils.stat import update_stat_project
from ldt.ldt_utils.utils import reduce_text_node
from tagging import settings as tagging_settings
import logging
import lxml.etree #@UnresolvedImport
import tagging.utils
from ldt.utils.url import request_with_auth
from StringIO import StringIO

logger = logging.getLogger(__name__)

def Property(func):
    return property(**func()) 


class LdtIndexer(object):
    
    def __init__(self, decoupage_blackList=settings.DECOUPAGE_BLACKLIST):
            self.__decoupage_blacklist = decoupage_blackList
        
    @Property
    def decoupage_blacklist(): #@NoSelf
        doc = """get blacklist""" #@UnusedVariable
       
        def fget(self):
            if self.__decoupage_blacklist is None:
                self.__decoupage_blacklist = ()
            return self.__decoupage_blacklist
           
        def fset(self, value):
            self.__decoupage_blacklist = value
           
        def fdel(self):
            del self.__decoupage_blacklist
           
        return locals()
    
    def index_all(self):
        raise NotImplemented
    
    def index_ensemble(self, ensemble, content, project=None):
        ensembleId = ensemble.get(u"id", None)
        
        for decoupageNode in ensemble.getchildren():
            if decoupageNode.tag != "decoupage"  or decoupageNode.get(u"id", None) in self.decoupage_blacklist:
                continue
            
            decoupId = decoupageNode.get(u"id", None)
            res = decoupageNode.xpath("elements/element")
            for elementNode in res:
                
                elementId = elementNode.get(u"id", None)
                tags = elementNode.get(u"tags", None)
                                
                if tags is None or len(tags) == 0:
                    tags = u""
                    restagnode = elementNode.xpath("tag/text()", smart_strings=False)
                    for tagnode in restagnode:
                        tags = tags + u"," + tagnode
                        
                if tags is None or len(tags) == 0:
                    tags = u""
                    restagnode = elementNode.xpath("tags/tag/text()", smart_strings=False)

                    for tagnode in restagnode:
                        tags = tags + u"," + tagnode
                
                if tags is None:
                    tags = u""
                
                tags_list = [tag[:tagging_settings.MAX_TAG_LENGTH] for tag in tagging.utils.parse_tag_input(tags)]
                tags = u",".join(tags_list)
                if u"," not in tags:
                    tags = u"," + tags
                

                title = reduce_text_node(elementNode, "title/text()")                
                abstract = reduce_text_node(elementNode, "abstract/text()")
                polemics = elementNode.xpath('meta/polemics/polemic/text()')
                
                author = elementNode.get("author", "")
                start_ts = int(float(elementNode.get("begin", "-1")))
                duration = int(float(elementNode.get("dur", "0")))
                date_str = elementNode.get("date", "")
                ldt_id = u""
                if project:
                    ldt_id = project.ldt_id
                # audio annotation management
                audio_src = u""
                audio_href = u""
                audio_node = elementNode.xpath('audio')
                if audio_node:
                    audio_src = audio_node[0].get(u"source", u"")
                    audio_href = audio_node[0].text

                seg = Segment(content=content,
                              iri_id=content.iri_id,
                              ensemble_id=ensembleId,
                              cutting_id=decoupId,
                              element_id=elementId,
                              tags=tags,
                              title=title,
                              abstract=abstract,
                              duration=duration,
                              author=author,
                              start_ts=start_ts,
                              date=date_str,
                              project_obj=project,
                              project_id=ldt_id,
                              audio_src=audio_src,
                              audio_href=audio_href)
                seg.polemics = seg.get_polemic(polemics)
                seg.save()


class ContentIndexer(LdtIndexer):
        
    def __init__(self, contentList, decoupage_blackList=settings.DECOUPAGE_BLACKLIST, callback=None):
        super(ContentIndexer, self).__init__(decoupage_blackList)
        self.__contentList = contentList
        self.__callback = callback
                                   
    def index_all(self):
        for i,content in enumerate(self.__contentList):
            if self.__callback:
                self.__callback(i,content)
            self.index_content(content)
            
    def index_content(self, content):        
        url = content.iri_url()
        _, file_content = request_with_auth(url)
        doc = lxml.etree.parse(StringIO(file_content)) #@UndefinedVariable
       
        Segment.objects.filter(iri_id=content.iri_id).delete() #@UndefinedVariable
        
        res = doc.xpath("/iri/body/ensembles/ensemble")

        for ensemble in res:                
            self.index_ensemble(ensemble, content)
                    
            
class ProjectIndexer(LdtIndexer):

    def __init__(self, projectList, decoupage_blackList=settings.DECOUPAGE_BLACKLIST, callback=None):
        super(ProjectIndexer, self).__init__(decoupage_blackList)                
        self.__projectList = projectList
        self.__callback = callback
                              
    def index_all(self):
        for i,project in enumerate(self.__projectList):
            if self.__callback:
                self.__callback(i,project)

            self.index_project(project)

    def index_project(self, project):

        # pocketfilms.utils.log.debug("Indexing project : "+str(project.iri_id))
        doc = lxml.etree.fromstring(project.ldt_encoded) #@UndefinedVariable

        Segment.objects.filter(project_obj__ldt_id=project.ldt_id).delete() #@UndefinedVariable
       
        res = doc.xpath("/iri/annotations/content")

        for content in res:
            contentId = content.get(u"id", None)
            content_obj = None

            clist = Content.objects.filter(iri_id = contentId) #@UndefinedVariable
            if len(clist) > 0:
                content_obj = clist[0]

            for ensemble in content.getchildren():
                self.index_ensemble(ensemble, content_obj, project)

@receiver(post_save, sender=Project)
def index_project(sender, **kwargs):
    if settings.AUTO_INDEX_AFTER_SAVE:
        instance = kwargs['instance']
        if instance.state != 2:
            Segment.objects.filter(project_obj__ldt_id=instance.ldt_id).delete() #@UndefinedVariable
            update_stat_project(instance)
        else:
            projectIndexer = ProjectIndexer([instance])
            projectIndexer.index_all()
            update_stat_project(instance)