src/spel/views.py
author cavaliet
Wed, 01 Oct 2014 13:14:23 +0200
changeset 137 02e81a3fc99b
parent 106 ef4095059d92
child 142 34ad53eecb18
permissions -rwxr-xr-x
update venv to allow turtle and rdf export thanks to libadvene

'''
Created on 2014-02-21

@author: tcavalie
'''

from django.conf import settings
from django.contrib.contenttypes.models import ContentType
from django.db.models import Q
from django.db.models.aggregates import Count
from django.db.models.query import RawQuerySet
from django.http.response import HttpResponse
from django.shortcuts import get_object_or_404
from django.views.generic import TemplateView, View
from itertools import groupby
from ldt.indexation import get_results_with_context
from ldt.ldt_utils.models import Segment, Content, Project
from ldt.ldt_utils.projectserializer import ProjectJsonSerializer
from ldt.ldt_utils.views.json import project_json_id
from ldt.ldt_utils.views.workspace import get_search_results
from operator import itemgetter
from taggit.models import Tag, TaggedItem
import json
import re
import time
import uuid


import logging
logger = logging.getLogger(__name__)



class Home(TemplateView):
    template_name = "spel_home.html"
    def get(self, request):
        context = {}
        return self.render_to_response(context)



class Theatre(TemplateView):
    template_name = "spel_theatre.html"
    def get(self, request):
        # Get start and end for date bounds (earliest is available only on django 1.6)
        start_date = Content.objects.filter(tags__name__in=["content_theatre"]).order_by("content_creation_date")[0].content_creation_date
        end_date = Content.objects.filter(tags__name__in=["content_theatre"]).latest("content_creation_date").content_creation_date
        context = {"start_date":start_date, "end_date":end_date }
        return self.render_to_response(context)



class Opera(TemplateView):
    template_name = "spel_opera.html"
    def get(self, request):
        # Get start and end for date bounds (earliest is available only on django 1.6)
        start_date = Content.objects.filter(tags__name__in=["content_opera"]).order_by("content_creation_date")[0].content_creation_date
        end_date = Content.objects.filter(tags__name__in=["content_opera"]).latest("content_creation_date").content_creation_date
        context = {"start_date":start_date, "end_date":end_date }
        return self.render_to_response(context)



class ChapterRequest(TemplateView):
    
    template_name = "partial/spel_chapters.html"
    
    def get(self, request):
        # Filter content by date if necessary
        content_qs = Content.objects.filter(tags__name__in=["content_theatre"])
        start_date_param = request.GET.get("start_date", "")
        if start_date_param!="":
            content_qs = content_qs.filter(content_creation_date__gt=start_date_param)
        end_date_param = request.GET.get("end_date", "")
        if end_date_param!="":
            content_qs = content_qs.filter(content_creation_date__lt=end_date_param + " 23:59:59")
        iri_ids = content_qs.values_list("iri_id", flat=True)
        #logger.debug("iri_ids")
        #logger.debug(iri_ids)
        # Filter segment if necessary
        annot_types_param = request.GET.get("annotation_types", "")
        seg_queryset = Segment.objects.filter(iri_id__in=iri_ids).select_related("content__title")#.prefetch_related("tags")
        annot_types = []
        if annot_types_param!="":
            annot_types = annot_types_param.split(",")
            seg_queryset = seg_queryset.filter(cutting_id__in=annot_types)
        
        # First we look at modalites_sceniques and personnages tags.
        mod_scen_param = request.GET.get("modalites_sceniques", "")
        mod_scen = []
        if mod_scen_param!="":
            mod_scen = mod_scen_param.split(",")
        perso_param = request.GET.get("personnages", "")
        perso = []
        if perso_param!="":
            perso = perso_param.split(",")
        
        # Tags from start text and end text
        start_text = request.GET.get("start_text", "")
        end_text = request.GET.get("end_text", "")
        ref_text = None
        searched_ref_text = None
        if (start_text!="" and start_text!="start") or (end_text!="" and end_text!="end"):
            # First get all ref_text values
            rt_tags = Tag.objects.filter(name__startswith="ref_text:")
            ref_text_int = []
            for rt in rt_tags:
                m = re.match(r"^(\d+)-(\d+)$", rt.name[10:])
                if m and len(m.groups())==2:
                    ref_text_int.append((int(m.group(1)), int(m.group(2))))
            ref_text_int.sort()
            all_ref_text = [(str(one) + "-" + str(two)) for (one,two) in ref_text_int]
            
            # At least start or end have been defined, so we search for the concerned chapter ids.
            start_index = 0
            if start_text!="start":
                try:
                    start_index = all_ref_text.index(start_text)
                except:
                    pass
            end_index = len(all_ref_text)
            if end_text!="end":
                try:
                    end_index = all_ref_text.index(end_text) + 1
                except:
                    pass
            searched_ref_text = all_ref_text[start_index:end_index]
            ref_text = Tag.objects.filter(name__in=[ ("ref_text: " + rt) for rt in searched_ref_text ])
        
        # Get tags from orm
        all_tags = mod_scen + perso
        tags = Tag.objects.filter(name__in=all_tags)
        # seg_queryset.filter(tags__in=tags) doesn't work because taggit finds segments with one of the tags and not ALL tags
        # So we make a correct request through TaggedItem first
        # Ref text filter if possible
        if ref_text and len(ref_text)>0:
            # a chapter can only have on ref_text, and the search on ref_text is a OR.
            # That's many requests (with the orm) but it seems the only thing possible with tagging
            s = []
            for rt in ref_text:
                current_tags = list(tags)
                current_tags.append(rt)
                #s += list(TaggedItem.objects.get_by_model(seg_queryset, current_tags))
                seg_ids = list(TaggedItem.objects\
                            .values_list("object_id", flat=True)\
                            .filter(content_type=ContentType.objects.get_for_model(Segment))\
                            .filter(tag__in=current_tags)\
                            .annotate(count_status=Count('object_id'))\
                            .filter(count_status=len(current_tags)))
                s += list(seg_queryset.filter(pk__in=seg_ids))
        else:
            # Get segments from tagged items
            #s = TaggedItem.objects.get_by_model(seg_queryset, tags)
            tags = list(tags)
            seg_ids = list(TaggedItem.objects\
                        .values_list("object_id", flat=True)\
                        .filter(content_type=ContentType.objects.get_for_model(Segment))\
                        .filter(tag__in=tags)\
                        .annotate(count_status=Count('object_id'))\
                        .filter(count_status=len(tags)))
            s = list(seg_queryset.filter(pk__in=seg_ids))
        
        context = {"annot_types":annot_types, "start_date":start_date_param, "end_date":end_date_param, 
                   "mod_scen":mod_scen, "perso":perso, "searched_ref_text":searched_ref_text, "segments": s}
        
        return self.render_to_response(context)
    
    def post(self, request):
        annotations_param = request.POST.get("annotations", "[]")
        grouped_annotations = []
        for iri_id, items in groupby(json.loads(annotations_param), itemgetter('iri_id')):
            # Get segments timecodes
            s = [ int(i["data"]) for i in items ]
            grouped_annotations.append({ 'content': iri_id, 'list': s })
        if len(grouped_annotations)==0:
            return HttpResponse("")
        # Request segment : we make a complex query impossible (?) to do with orm, even with Q, Sum and other stuffs.
        # Here is a SQL example of what we want :
        # select ldt_utils_segment.id, ldt_utils_segment.cutting_id, ldt_utils_segment.tags, ldt_utils_segment.start_ts, ldt_utils_segment.duration, ldt_utils_content.title AS ct
        # from ldt_utils_segment 
        # INNER JOIN ldt_utils_content ON (ldt_utils_segment.content_id = ldt_utils_content.id)
        # where cutting_id IN ('performance','discussion') 
        # AND (
        #   ( ldt_utils_segment.iri_id='CONTENT_ID_1' AND (
        #     ( ldt_utils_segment.start_ts < TIMECODE_1 AND TIMECODE_1 < (ldt_utils_segment.start_ts + ldt_utils_segment.duration) )
        #      OR 
        #     ( ldt_utils_segment.start_ts < TIMECODE_2 AND TIMECODE_2 < (ldt_utils_segment.start_ts + ldt_utils_segment.duration) )
        #   )) 
        #   OR 
        #   ( ldt_utils_segment.iri_id='CONTENT_ID_2' AND (
        #     ( ldt_utils_segment.start_ts < TIMECODE_3 AND TIMECODE_3 < (ldt_utils_segment.start_ts + ldt_utils_segment.duration) )
        #   )) 
        # )
        
        raw_query = "select ldt_utils_segment.id, ldt_utils_segment.cutting_id, ldt_utils_segment.start_ts, ldt_utils_segment.duration, ldt_utils_content.title AS ct \nfrom ldt_utils_segment \nINNER JOIN ldt_utils_content ON (ldt_utils_segment.content_id = ldt_utils_content.id) \nwhere cutting_id IN ('performance','discussion') \nAND ("
        for i, ga in enumerate(grouped_annotations):
            if i>0:
                raw_query += "\n  OR "
            raw_query += "\n  ( ldt_utils_segment.iri_id='" + ga["content"] + "' AND ("
            for j, tc in enumerate(ga["list"]):
                if j>0:
                    raw_query += "\n     OR "
                raw_query += "\n    ( ldt_utils_segment.start_ts < " + str(tc) + " AND " + str(tc) + " < (ldt_utils_segment.start_ts + ldt_utils_segment.duration) )"
            raw_query += "\n  )) "
        raw_query += "\n) "
        
        context = {"annot_chapters": True, "start_date":"", "end_date":"", "annot_types": [], "mod_scen":[], "perso":[], "segments": list(Segment.objects.raw(raw_query)) }
        return self.render_to_response(context)



class AnnotationRequest(TemplateView):
    
    template_name = "partial/spel_annotations.html"
    
    def get(self, request):
        q = request.GET.get("q", "")
        page = 1
        content_list = Content.objects.filter(tags__name__in=["content_theatre"])
        if q!="":
            field = "abstract"
            results, nb_contents, nb_segments = get_search_results(request, q, field, page, content_list)
            results = results.object_list
            type_inter_param = ""
        else:
            type_inter_param = request.GET.get("type_inter", "")
            seg_queryset = []
            results = []
            tagged_segs = []
            if type_inter_param!="":
                type_inter = [("type_inter: " + t) for t in type_inter_param.split(",")]
                tags = Tag.objects.filter(name__in=type_inter)
                # Get segments from tagged items
                #tagged_segs = TaggedItem.objects.get_by_model(Segment, tags).values()
                tags = list(tags)
                seg_ids = list(TaggedItem.objects\
                            .values_list("object_id", flat=True)\
                            .filter(content_type=ContentType.objects.get_for_model(Segment))\
                            .filter(tag__in=tags)\
                            .annotate(count_status=Count('object_id'))\
                            .filter(count_status=len(tags)))
                tagged_segs = Segment.objects.filter(pk__in=seg_ids).prefetch_related("tags__name").values("pk", "tags__name", "project_id", "iri_id", "ensemble_id", "cutting_id", "element_id", "title", "duration", "start_ts", "author", "date", "abstract", "polemics", "id_hash", "audio_src", "audio_href")
                
                # Because of prefetch and values, we have to parse all items in order to create a list of tags for all items
                tagged_segs_dict = {}
                for s in tagged_segs:
                    if s['pk'] not in tagged_segs_dict:
                        tagged_segs_dict[s['pk']] = s
                        tagged_segs_dict[s['pk']]["tags"] = []
                    tagged_segs_dict[s['pk']]["tags"].append(s['tags__name'])
                tagged_segs = tagged_segs_dict.values()
                
                all_contents = list(Content.objects.filter(iri_id__in=[s['iri_id'] for s in tagged_segs]))
                for iri_id, items in groupby(tagged_segs, itemgetter('iri_id')):
                    # Get good content
                    content = None
                    content_filter = filter(lambda e: e.iri_id == iri_id, all_contents)
                    if len(content_filter)>0:
                        content = content_filter[0]
                    if content is None:
                        continue
                    # Get segments
                    s = list(items)
                    results.append({ 'content': content, 'list': s })
            nb_contents = len(results)
            nb_segments = len(tagged_segs)
        
        context = {"q":q, "searched_tags": type_inter_param.split(","), "results": results, "nb_contents": nb_contents, "nb_annotations": nb_segments}
        
        return self.render_to_response(context)
    
    def post(self, request):
        chapters_param = request.POST.get("chapters", "[]")
        grouped_chapters = []
        for iri_id, items in groupby(json.loads(chapters_param), itemgetter('iri_id')):
            # Get segments timecodes
            s = [ {"start": int(i["start"]), "end": int(i["end"])} for i in items ]
            grouped_chapters.append({ 'content': iri_id, 'list': s })
        if len(grouped_chapters)==0:
            return HttpResponse("")
        
        
        # Request segment : we make a complex query impossible (?) to do with orm, even with Q, Sum and other stuffs.
        # Here is a SQL example of what we want :
        # select *
        # from ldt_utils_segment 
        # where cutting_id IN ('performance','discussion') 
        # AND (
        #   ( ldt_utils_segment.iri_id='CONTENT_ID_1' AND (
        #     ( TIMECODE_START_1 < ldt_utils_segment.start_ts AND ldt_utils_segment.start_ts < TIMECODE_END_1 )
        #      OR 
        #     ( TIMECODE_START_2 < ldt_utils_segment.start_ts AND ldt_utils_segment.start_ts < TIMECODE_END_2 )
        #   )) 
        #   OR 
        #   ( ldt_utils_segment.iri_id='CONTENT_ID_2' AND (
        #     ( TIMECODE_START_3 < ldt_utils_segment.start_ts AND ldt_utils_segment.start_ts < TIMECODE_END_3 )
        #   )) 
        # )
        
        raw_query = "SELECT ldt_utils_segment.id, taggit_tag.name AS tags__name, ldt_utils_segment.iri_id, ldt_utils_segment.cutting_id, ldt_utils_segment.element_id, ldt_utils_segment.title, ldt_utils_segment.duration, ldt_utils_segment.start_ts, ldt_utils_segment.abstract \nFROM ldt_utils_segment \nLEFT OUTER JOIN taggit_taggeditem \nON (ldt_utils_segment.id = taggit_taggeditem.object_id) \nLEFT OUTER JOIN taggit_tag \nON (taggit_taggeditem.tag_id = taggit_tag.id) \nwhere cutting_id NOT IN ('performance','discussion') \nAND ("
        #raw_query = "select id, iri_id, cutting_id, start_ts, duration, title, abstract \nfrom ldt_utils_segment \nwhere cutting_id NOT IN ('performance','discussion') \nAND ("
        for i, ga in enumerate(grouped_chapters):
            if i>0:
                raw_query += "\n  OR "
            raw_query += "\n  ( ldt_utils_segment.iri_id='" + ga["content"] + "' AND ("
            for j, tc in enumerate(ga["list"]):
                if j>0:
                    raw_query += "\n     OR "
                raw_query += "\n    ( " + str(tc["start"]) + " < ldt_utils_segment.start_ts AND ldt_utils_segment.start_ts < " + str(tc["end"]) + " )"
            raw_query += "\n  )) "
        raw_query += "\n)"
        
        # Dict because of itemgetter for groupby
        tagged_segs = [ dict(s.__dict__) for s in Segment.objects.raw(raw_query) ]
        # Because of taggit_tag.name JOIN, we have to parse all items in order to create a list of tags for all items
        tagged_segs_dict = {}
        for s in tagged_segs:
            if s['id'] not in tagged_segs_dict:
                tagged_segs_dict[s['id']] = s
                tagged_segs_dict[s['id']]["tags"] = []
            tagged_segs_dict[s['id']]["tags"].append(s['tags__name'])
        tagged_segs = tagged_segs_dict.values()
        
        # Prefetch all contents
        all_contents = list(Content.objects.filter(iri_id__in=[s['iri_id'] for s in tagged_segs]))
        results = []
        for iri_id, items in groupby(tagged_segs, itemgetter('iri_id')):
            # Get good content
            content = None
            content_filter = filter(lambda e: e.iri_id == iri_id, all_contents)
            if len(content_filter)>0:
                content = content_filter[0]
            if content is None:
                continue
            # Get segments
            s = list(items)
            results.append({ 'content': content, 'list': s })
        nb_contents = len(results)
        nb_segments = len(tagged_segs)
        
        context = {"chapter_annots":True, "q":"", "searched_tags": [], "results": results, "nb_contents": nb_contents, "nb_annotations": nb_segments}
        
        return self.render_to_response(context)


class OperaRequest(TemplateView):
    
    template_name = "partial/spel_opera_annotations.html"
    
    def get(self, request):
        # Filter content by date if necessary
        content_qs = Content.objects.filter(tags__name__in=["content_opera"])
        start_date_param = request.GET.get("start_date", "")
        if start_date_param!="":
            content_qs = content_qs.filter(content_creation_date__gt=start_date_param)
        end_date_param = request.GET.get("end_date", "")
        if end_date_param!="":
            content_qs = content_qs.filter(content_creation_date__lt=end_date_param + " 23:59:59")
        
        # Query on abstract management
        q = request.GET.get("q", "").strip()
        if q!="":
            # Query case : we get segments by the search engine
            field = "abstract"
            results = get_results_with_context(Segment, field, q, content_qs)
            seg_queryset = Segment.objects.filter(pk__in=[e['indexation_id'] for e in results]).select_related("content__title")
        else:
            # Filter segment if necessary
            iri_ids = content_qs.values_list("iri_id", flat=True)
            seg_queryset = Segment.objects.filter(iri_id__in=iri_ids).select_related("content__title")
        
        annot_types_param = request.GET.get("annotation_types", "")
        annot_types = []
        if annot_types_param!="":
            annot_types = annot_types_param.split(",")
            seg_queryset = seg_queryset.filter(cutting_id__in=annot_types)
        
        # First we look at modalites_sceniques and personnages tags.
        mod_scen_param = request.GET.get("modalites_sceniques", "")
        mod_scen = []
        if mod_scen_param!="":
            mod_scen = mod_scen_param.split(",")
        perso_param = request.GET.get("personnages", "")
        perso = []
        if perso_param!="":
            perso = perso_param.split(",")
        type_travail_param = request.GET.get("type_travail", "")
        type_travail = []
        if type_travail_param!="":
            type_travail = type_travail_param.split(",")
        acte_param = request.GET.get("acte", "")
        acte = []
        if acte_param!="":
            acte = acte_param.split(",")
        scene_param = request.GET.get("scene", "")
        scene = []
        if scene_param!="":
            scene = scene_param.split(",")
        
        # Mesure management
        start_mesure = request.GET.get("start_mesure", "")
        end_mesure = request.GET.get("end_mesure", "")
        mesure = None
        mesure_int = None
        if start_mesure=="start" and end_mesure=="":
            mesure = []
            mesure_int = []
        elif start_mesure!="start" and end_mesure=="":
            # Only one mesure has been defined
            mesure = [Tag.objects.get(name="opera_mesure: " + start_mesure)]
            mesure_int = [start_mesure]
        elif end_mesure!="":
            # First get all ref_text values
            m_tags = Tag.objects.filter(name__startswith="opera_mesure:")
            mesure_int = []
            for m in m_tags:
                tested = re.match(r"^(\d+)$", m.name[14:])
                if tested and len(tested.groups())==1:
                    mesure_int.append(int(tested.group(1)))
            mesure_int.sort()
            
            # At least start or end have been defined, so we search for the concerned chapter ids.
            start_index = 0
            if start_mesure!="start":
                try:
                    start_index = mesure_int.index(int(start_mesure))
                except:
                    pass
            end_index = len(mesure_int)
            if end_mesure!="end":
                try:
                    end_index = mesure_int.index(int(end_mesure)) + 1
                except:
                    pass
            mesure_int = mesure_int[start_index:end_index]
            mesure = Tag.objects.filter(name__in=[ ("opera_mesure: " + str(m)) for m in mesure_int ])
        
        # Get tags from orm
        all_tags = mod_scen + perso + type_travail + acte + scene
        tags = Tag.objects.filter(name__in=all_tags)
        
        # seg_queryset.filter(tags__in=tags) doesn't work because taggit finds segments with one of the tags and not ALL tags
        # So we make a correct request through TaggedItem first
        # Mesure filter if possible
        if mesure and len(mesure)>0:
            # a chapter can only have on ref_text, and the search on ref_text is a OR.
            # That's many requests (with the orm) but it seems the only thing possible with tagging
            s = []
            for m in mesure:
                current_tags = list(tags)
                current_tags.append(m)
                #s += list(TaggedItem.objects.get_by_model(seg_queryset, current_tags))
                seg_ids = list(TaggedItem.objects\
                            .values_list("object_id", flat=True)\
                            .filter(content_type=ContentType.objects.get_for_model(Segment))\
                            .filter(tag__in=current_tags)\
                            .annotate(count_status=Count('object_id'))\
                            .filter(count_status=len(current_tags)))
                s += list(seg_queryset.filter(pk__in=seg_ids))
        else:
            # Get segments from tagged items
            #s = TaggedItem.objects.get_by_model(seg_queryset, tags)
            tags = list(tags)
            if len(tags)>0:
                seg_ids = list(TaggedItem.objects\
                            .values_list("object_id", flat=True)\
                            .filter(content_type=ContentType.objects.get_for_model(Segment))\
                            .filter(tag__in=tags)\
                            .annotate(count_status=Count('object_id'))\
                            .filter(count_status=len(tags)))
                s = list(seg_queryset.filter(pk__in=seg_ids))
            else:
                s = list(seg_queryset)
        
        context = {"annot_types":annot_types, "start_date":start_date_param, "end_date":end_date_param, 
                   "mod_scen":mod_scen, "perso":perso, "type_travail":type_travail, "acte":acte, 
                   "scene":scene, "mesure":mesure_int, "q":q, "segments": s}
        
        return self.render_to_response(context)




class JsonRdfExport(View):

    def get(self, request, id):
        
        """
        This helper module allows to convert advene 2packages from one format to another
        without having to depend on the whole advene2 library.
        It does so by invoking advene-convert as a subprocess.
        """


        format = request.GET.get("format","")
        if format == "rdf" or format == "ttl":
            from libadvene.model.cam.package import Package
            from libadvene.model.parsers.cinelab_json import Parser
            from libadvene.model.serializers.cinelab_rdf import serialize_to as serialize_to_rdf
            from libadvene.model.serializers.cinelab_ttl import serialize_to as serialize_to_ttl
            
            project = get_object_or_404(Project, ldt_id=id)
            ps = ProjectJsonSerializer(project)
            project_dict = ps.serialize_to_cinelab()
            p = Package("http://spectacleenligne.fr", create=True)
            Parser.parse_into(project_dict, p)
            outfile = "/tmp/" + unicode(uuid.uuid1())
            filedata = ""
            if format == "rdf":
                serialize_to_rdf(p, outfile)
            elif format == "ttl":
                serialize_to_ttl(p, outfile)
            with open(outfile, "r") as myfile:
                filedata = myfile.read()
            return HttpResponse(filedata, content_type="text/plain")
        return project_json_id(request, id)