src/ldt/ldt/ldt_utils/stat.py
author verrierj
Wed, 18 Jan 2012 15:30:35 +0100
changeset 412 8d777b1d1d92
parent 411 d126a67897c0
child 415 4236f99104ba
permissions -rw-r--r--
Add API to search annotations inside a content between two timecodes

from django.conf import settings
from ldt.ldt_utils.models import AnnotationStat, Project, Segment
from django.db.models.signals import pre_delete
import lxml.etree
import datetime


def update_stat_project(project, contents=None, doc=None, update_contents=[]):
    
    if not contents:
        contents = project.contents.all()
     
    if not doc:
        doc = lxml.etree.fromstring(project.ldt)

    number_division = settings.DIVISIONS_FOR_STAT_ANNOTATION
    contributions = list(AnnotationStat.objects.filter(project=project))    
        
    for content_node in doc.xpath('/iri/annotations/content'):
        content_name = content_node.get('id')
        content = filter_list(contents, 'iri_id', content_name)
        
        # if the content referenced in the xml belongs to the
        # fields contents of the project
        if len(content) != 0:
            content = content[0]
            contribution_to_content = contributions.get(content=content)
            
            size_division = content.duration / number_division
            limits = [x * size_division for x in range(number_division+1)]
            buckets = [0] * number_division
            nb_annotation = 0
            
            for ann in content_node.xpath('ensemble/decoupage/elements/element'):
     
                begin = int(ann.get('begin'))
                end = int(ann.get('dur')) + begin
                        
                buckets = find_buckets(buckets, limits, begin, end)
                nb_annotation += 1        
           
            buckets = get_string_from_buckets(buckets)
             
            if contribution_to_content.stat != buckets or contribution_to_content.nb_annotation != nb_annotation:
                contribution_to_content.stat = buckets
                contribution_to_content.nb_annotation = nb_annotation
                contribution_to_content.save()
                update_stat_content(content)
    
    for content in update_contents:
        update_stat_content(content)
        
        
def get_string_from_buckets(buckets):
    s = "%s" % buckets
    s = s[1:-1].replace(' ', '')
    return s

def get_buckets_from_string(string):
    return [int(x) for x in string.split(',')]


def find_buckets(buckets, limits, begin, end):
    
    if len(buckets)+1 != len(limits):
        raise ValueError("There should be as many buckets as those defined by limits.")
    
    has_started = False  
        
    for i in range(len(limits)-1):
        if not has_started:
            if limits[i] <= begin and begin <= limits[i+1]:
                buckets[i] += 1
                has_started = True
                #print "Starts after timecode %s" % limits[i]
        elif limits[i] > end:
            #print "Ends before timecode %s" % limits[i]
            break
        else:
            buckets[i] += 1
            
    return buckets
        
        
def delete_stat_project(sender, instance, **kwargs):
    
    contents = list(instance.contents.all())
    contributions = AnnotationStat.objects.filter(project=instance)
    
    for c in contributions:
        result = [x for x in contents if x.id == c.content.id ]
        if result:
            content = result[0]
            
            if c.nb_annotation:        
                content.nb_annotation -= c.nb_annotation
            
            if c.stat:
                content_stat = get_buckets_from_string(content.stat_annotation)
                proj_stat = get_buckets_from_string(c.stat)
                content.stat_annotation = get_string_from_buckets([pair[0] - pair[1] for pair in zip(content_stat, proj_stat)])
            
            content.save()        

pre_delete.connect(delete_stat_project, sender=Project)    
        
            
def update_stat_content(content):
    all_contributions = AnnotationStat.objects.filter(content=content)
    content_stat = [0] * settings.DIVISIONS_FOR_STAT_ANNOTATION
    nb_annotation = 0
    
    for project in all_contributions:
        if project.stat and project.stat != '':
            project_stat = get_buckets_from_string(project.stat)
            content_stat = [sum(pair) for pair in zip(content_stat, project_stat)]
        nb_annotation += project.nb_annotation  
    
    content.nb_annotation = nb_annotation
    content.stat_annotation = get_string_from_buckets(content_stat)
    content.last_annotated = datetime.datetime.now()
    content.save()    

import uuid
def add_annotation_to_stat(content, project, begin, end):
    contribution_project = AnnotationStat.objects.get(project=project, content=content)
    
    if not contribution_project.nb_annotation:
        contribution_project.nb_annotation = 1
    else:
        contribution_project.nb_annotation += 1
    content.nb_annotation += 1
    
#    print "before segment creation"
#    s = Segment(project_obj=project,
#                content=content,
#                project_id=project.ldt_id,
#                iri_id=content.iri_id,
#                ensemble_id='%s' % unicode(uuid.uuid1()),
#                cutting_id='t',    
#                element_id='a',
#                duration=end-begin,
#                start_ts=begin)
#    s.save()
#    print s.id
#    print "segment created"
    
    if contribution_project.stat:
        
        number_division = settings.DIVISIONS_FOR_STAT_ANNOTATION
        size_division = content.duration / number_division
        
        limits = [x * size_division for x in range(number_division+1)]
        buckets = [0] * number_division        
        buckets = find_buckets(buckets, limits, begin, end)
        
        content_stat = get_buckets_from_string(content.stat_annotation)
        content.stat_annotation = get_string_from_buckets([pair[0] + pair[1] for pair in zip(content_stat, buckets)])
        contribution_project.stat = get_string_from_buckets(buckets)  

    content.last_annotated = datetime.datetime.now()
    content.save()
    contribution_project.save()
    

def filter_list(list, criteria, value):
    new_list = []
    for l in list:
        if getattr(l, criteria) == value:
            new_list.append(l)
    
    return new_list