spel: changeset 146:404a0085badf

--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/spel/management/commands/generate_demonstrateur_data.py	Mon Nov 03 17:05:50 2014 +0100
@@ -0,0 +1,355 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Mar 22, 2013
+
+@author: tc
+'''
+
+from datetime import datetime
+from django.conf import settings
+from django.contrib.auth import get_user_model
+from django.core.management.base import BaseCommand, CommandError
+from ldt.api.ldt.serializers.cinelabserializer import CinelabSerializer
+from ldt.ldt_utils.models import Media, Content, Project
+from ldt.ldt_utils.projectserializer import ProjectJsonSerializer
+from ldt.ldt_utils.stat import update_stat_content
+from ldt.security import set_current_user
+#from string import replace
+import copy
+import csv
+import json
+import lxml.etree
+#import os.path
+#import requests
+#import time
+
+import logging
+logger = logging.getLogger(__name__)
+
+User = get_user_model()
+
+class Command(BaseCommand):
+    '''
+    Load medias, contents, project from json generated by ubicast
+    '''
+
+    args = 'csv data file'
+    help = 'generates json files for demonstrateur contents and project'
+    
+    
+    def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None):
+        val = dict_arg.get(key, default)
+        return conv(val) if val else default
+
+    def __safe_decode(self, s):
+        if not isinstance(s, basestring):
+            return s
+        try:
+            return s.decode('utf8')
+        except:
+            try:
+                return s.decode('latin1')
+            except:
+                return s.decode('utf8','replace')
+    
+    def apply_exceptions(self, a):
+        if a["id"]=="a17" and a["media"]=="travail-sur-lacte-1-jour-15_b822f8":
+            a["end"] = 2723000
+        elif a["id"]=="a86" and a["media"]=="travail-sur-lacte-3-et-lacte-1-jour-22_4c3a75":
+            a["begin"] = 20925000
+            a["end"] = 21290000
+        elif a["id"]=="a1" and a["media"]=="premier-bout-a-bout-de-la-piece-jour-23_73654f":
+            a["end"] = 728000
+        elif a["id"]=="a1" and a["media"]=="bout-a-bout-avec-les-enfants_a4c293_SON_SATURE":
+            a["end"] = 560000
+        elif a["id"]=="a1" and a["media"]=="filage-technique-jour-40-deuxieme-partie_58b2a6_SON_GRESILLE":    
+            a["end"] = 560000
+        elif a["id"]=="a105" and a["media"]=="travail-sur-lacte-2-jour-18_5e9fe9":
+            a["begin"] = 19628000
+        elif a["id"]=="a117" and a["media"]=="travail-sur-lacte-2-jour-18_5e9fe9":
+            a["end"] = 22080000
+        
+
+    def handle(self, *args, **options):
+        
+        # Test path
+        if len(args) != 1:
+            raise CommandError("The command has no argument or too much arguments. Only one is needed : the csv file.")
+        
+        with open(args[0], 'rU') as datafile:
+            reader = csv.reader(datafile)
+            lines = list(reader)
+            last_content_id = ""
+            last_content_title = ""
+            count_content = 0
+            
+            admin = User.objects.filter(is_superuser=True)[0]
+            set_current_user(admin)
+            
+            for line in lines:
+                # Test content id and main annotation id
+                content_id = line[1]
+                pre_annot_id = line[2]
+                pre_annot_tcin_hms = line[3]
+                pre_annot_tcin_ms = line[4]
+                pre_annot_tcout_hms = line[5]
+                pre_annot_tcout_ms = line[6]
+                annot_id = line[7]
+                annot_tcin_hms = line[10]
+                annot_tcin_ms = line[11]
+                annot_tcout_hms = line[12]
+                annot_tcout_ms = line[13]
+                post_annot_id = line[14]
+                post_annot_tcin_hms = line[15]
+                post_annot_tcin_ms = line[16]
+                post_annot_tcout_hms = line[17]
+                post_annot_tcout_ms = line[18]
+                created_ctt_id = line[19]
+                try:
+                    created_tcin_ms = int(line[21])
+                except:
+                    created_tcin_ms = False
+                try:
+                    created_ctt_dur = int(line[25])
+                except:
+                    created_ctt_dur = False
+                if content_id != "" and created_ctt_id != "" and created_tcin_ms and created_ctt_dur:
+                    if last_content_id != content_id:
+                        last_content_id = content_id
+                        count_content = 0
+                        print "get content iri_id = " + last_content_id
+                        ctt_source = Content.objects.filter(iri_id = last_content_id).select_related("front_project")[0]
+                        ps = ProjectJsonSerializer(ctt_source.front_project)
+                        print "project ldt_id = " + ctt_source.front_project.ldt_id
+                        project_source_data = ps.serialize_to_cinelab()
+                        
+                    
+                    if annot_id != "" and project_source_data:
+                        count_content += 1
+                        # Get tc from annotations datas
+                        tc_offset = annot_tcin_ms
+                        #tc_end = annot_tcout_ms
+                        if pre_annot_id != "":
+                            tc_offset = pre_annot_tcin_ms
+                        tc_offset = int(tc_offset)
+                        #if post_annot_id != "":
+                        #    tc_end = post_annot_tcout_ms
+                        
+                        # data to create new data
+                        ctt_title = content_id + (u" 0" if count_content<10 else u" ") + unicode(count_content)
+                        media_src = "/data/demonstrateur/"+content_id+"/" + created_ctt_id + ".mp4"
+                        if hasattr(settings, "DEMONSTRATEUR_DOMAIN") and len(settings.DEMONSTRATEUR_DOMAIN)>0:
+                            media_src = settings.DEMONSTRATEUR_DOMAIN + media_src
+                        media, _ = Media.objects.get_or_create(src=media_src, duration=created_ctt_dur)
+                        media.is_public = True
+                        content, _ = Content.objects.get_or_create(iri_id = created_ctt_id, 
+                                                                 iriurl = created_ctt_id+u"/"+created_ctt_id+u".iri", 
+                                                                 media_obj = media, 
+                                                                 title = ctt_title, 
+                                                                 duration = created_ctt_dur)
+                        content.is_public = True
+                        update_stat_content(content)
+                        # Get content front projet
+                        proj = content.front_project
+                        ps = ProjectJsonSerializer(proj)
+                        proj_data = ps.serialize_to_cinelab()
+                        proj_data["annotations"] = []
+                        for a in project_source_data["annotations"]:
+                            #print "get content 12"
+                            if a["id"] in [pre_annot_id, annot_id, post_annot_id]:
+                                self.apply_exceptions(a)
+                                temp_a = copy.deepcopy(a)
+                                b = temp_a["begin"]
+                                temp_a["begin"] = temp_a["begin"] - created_tcin_ms
+                                temp_a["end"] = temp_a["end"] - created_tcin_ms
+                                temp_a["media"] = created_ctt_id
+                                if temp_a["begin"] < 0:
+                                    print created_ctt_id + " : " + temp_a["id"] + " : < 0 : serveur : " + str(b) + ", excel : " + str(tc_offset) + ", created_tcin_ms : " + str(created_tcin_ms)
+                                if temp_a["end"] > created_ctt_dur:
+                                    print created_ctt_id + " : " + temp_a["id"] + " : > " + str(created_ctt_dur) + " : " + str(temp_a["end"])
+                                proj_data["annotations"].append(temp_a)
+                        # The annotation-types
+                        for at in project_source_data["annotation-types"]:
+                            #print "get content 15"
+                            proj_data["annotation-types"].append(at)
+                            proj_data["views"][0]["annotation_types"].append(at["id"])
+                            proj_data["lists"][0]["items"].append({ "id-ref": at["id"] })
+                        # The tags
+                        proj_data["tags"] = project_source_data["tags"]
+                        #print json.dumps(proj_data, indent=2)
+                        
+                        serializr = CinelabSerializer()
+                        serializr.validate_cinelab_json(proj_data)
+                        ldt_xml = serializr.cinelab_to_ldt(proj_data) 
+                        #print lxml.etree.tostring(ldt_xml, pretty_print=True)
+                        proj.ldt = lxml.etree.tostring(ldt_xml, pretty_print=True)
+                        #logger.debug(proj.ldt)
+                        proj.save()
+                        update_stat_content(content)
+                        
+                        print ctt_title + " : " + created_ctt_id
+                    
+                else:
+                    print "Nope"
+        
+        """
+        # Continue
+        if do_import:
+            print("Parsing folder %s ..." % path)
+            for dirpath, dirnames, filenames in os.walk(path):
+                #print("Parsing walk %s, %s, %s" % (dirpath, dirnames, filenames))
+                for filename in filenames:
+                    if filename == "cinelab_iri.json":
+                        ctt_id = os.path.basename(dirpath)
+                        # dirname is like "acte-1-stuff-thing_1234" and we want "Acte 1 stuff thing"
+                        content_title = ctt_id.split("_")[0].replace("-"," ").capitalize()
+                        json_path = os.path.join(dirpath, filename)
+                        print("Parsing json file %s ..." % json_path)
+                        json_data = False
+                        try:
+                            file_data = open(json_path)
+                            json_data = json.load(file_data)
+                            file_data.close()
+                        except:
+                            print("Error while parsing json file.")
+                        if json_data:
+                            dur = int(json_data["medias"][0]["meta"]["duration"])
+                            # Save media and content
+                            media, _ = Media.objects.get_or_create(src="/data/opera/"+ctt_id+"/720p.mp4", duration=dur)
+                            media.is_public = True
+                            content = Content.objects.create(iri_id=ctt_id, 
+                                                             iriurl=ctt_id+u"/"+ctt_id+u".iri", 
+                                                             media_obj=media, 
+                                                             title=content_title, 
+                                                             duration=dur,
+                                                             content_creation_date = json_data["meta"]["dc:created"])
+                            content.is_public = True
+                            content.tags.add("content_opera")
+                            # Get content front projet
+                            proj = content.front_project
+                            username = proj.owner.username
+                            now = datetime.utcnow().isoformat()
+                            # Start data to send to api
+                            proj_data = {}
+                            proj_data["meta"] = {}
+                            proj_data["meta"]["id"] = proj.ldt_id
+                            proj_data["meta"]["dc:title"] = proj.title
+                            proj_data["meta"]["dc:creator"] = username
+                            proj_data["meta"]["dc:description"] = "description added"
+                            proj_data["meta"]["dc:created"] = json_data["meta"]["dc:created"]
+                            proj_data["meta"]["dc:modified"] = json_data["meta"]["dc:modified"]
+                            proj_data["meta"]["dc:contributor"] = username
+                            proj_data["medias"] = []
+                            proj_data["medias"].append({"id": content.iri_id})
+                            # The tags and annotations (main part)
+                            proj_data["tags"] = []
+                            proj_data["annotations"] = []
+                            tags_id_label = {}
+                            tags_label_id = {}
+                            for a in json_data["annotations"]:
+                                # "content": {  "data": { "modalites_sceniques": "costumes,décors",... } }
+                                # Opera management :
+                                # tag "modalites_sceniques" becomes "opera_modalites_sceniques" 
+                                # tag "mesure" becomes "opera_mesure"
+                                # tag "personnages" becomes "opera_personnages"
+                                # tag "scene" becomes "opera_scene"
+                                # tag "type_travail" becomes "opera_type_travail" 
+                                # tag "acte" becomes "opera_acte"
+                                if "content" in a and "data" in a["content"] and type(a["content"]["data"]) == type(dict()):
+                                    annot_tags = []
+                                    desc = ""
+                                    title = ""
+                                    # Build tags
+                                    for k,v in a["content"]["data"].iteritems():
+                                        if k!="commentaire" and k!="description" and k!="titre":
+                                            if k=="acte" or k=="scene":
+                                                v = int(v)
+                                            k = "opera_" + k
+                                            v = unicode(v).split(",")
+                                            for val in v:
+                                                val = val.strip()
+                                                tag_label = k + u": " + val
+                                                if val!="":
+                                                    if not tag_label in tags_label_id:
+                                                        tags_label_id[tag_label] = generate_uuid()
+                                                        tags_id_label[tags_label_id[tag_label]] = tag_label
+                                                        #logger.debug("CREATED")
+                                                        #logger.debug(tags_label_id[tag_label] + " = " + tags_id_label[tags_label_id[tag_label]])
+                                                        proj_data["tags"].append({"meta": { "dc:title": tag_label }, "id": tags_label_id[tag_label] })
+                                                    annot_tags.append({"id-ref": tags_label_id[tag_label] })
+                                        elif k=="commentaire" or k=="description":
+                                            desc = v
+                                        elif k=="titre":
+                                            title = v
+                                    # Build annotation with needed fields
+                                    proj_data["annotations"].append({
+                                        "content": {
+                                            "mimetype": "application/x-ldt-structured",
+                                            "description": desc,
+    #                                         "img": {
+    #                                             "src": ""
+    #                                         },
+                                            "title": title,
+    #                                        "polemics": [ ],
+                                        },
+                                        "begin": a["begin"],
+                                        "meta": {
+    #                                        "dc:contributor": "admin",
+                                            "id-ref": a["type"],
+                                            "dc:created": now,
+    #                                        "dc:modified": "2014-03-04T16:40:23.609971",
+                                            "dc:creator": username
+                                        },
+                                        "end": a["end"],
+                                        "tags": annot_tags,
+                                        "color": "16763904",
+                                        "media": ctt_id,
+                                        "id": a["id"]
+                                    })
+                            
+                            # The annotation-types
+                            proj_data["annotation-types"] = []
+                            at_ids = []
+                            for at in json_data["annotation_types"]:
+                                proj_data["annotation-types"].append({
+    #                                dc:contributor: "admin",
+                                    "dc:creator": username,
+                                    "dc:title": at["id"],
+                                    "id": at["id"],
+    #                                dc:created: "2014-03-04T14:51:13.907674",
+                                    "dc:description": ""
+    #                                dc:modified: "2014-03-04T14:51:13.907674"
+                                })
+                                at_ids.append({ "id-ref": at["id"] })
+                            # The list of annotation-types
+                            list_id = generate_uuid()
+                            proj_data["lists"] = [{
+                                "items": at_ids,
+                                "meta": {
+                                    "dc:creator": username,
+                                    "id-ref": ctt_id,
+                                    "dc:title": "SPEL",
+                                    "dc:description": ""
+                                },
+                                "id": list_id
+                            }]
+                            # The views for default display
+                            proj_data["views"] = [{
+                                "id": generate_uuid(),
+                                "contents": [ ctt_id ],
+                                "annotation_types": [atid["id-ref"] for atid in at_ids]
+                            }]
+                            
+                            serializr = CinelabSerializer()
+                            serializr.validate_cinelab_json(proj_data)
+                            ldt_xml = serializr.cinelab_to_ldt(proj_data)
+                            proj.ldt = lxml.etree.tostring(ldt_xml, pretty_print=True)
+                            #logger.debug(proj.ldt)
+                            proj.save()
+                            update_stat_content(content)
+                    else:
+                        print("Ignoring or not exist.")
+        """
+        # This is the end
+        print("This is the end.")
+        
\ No newline at end of file
author	cavaliet
	Mon, 03 Nov 2014 17:05:50 +0100
changeset 146	404a0085badf
parent 145	80aa2d85fd01
child 147	fe5e9fe9c29c
src/spel/management/commands/generate_demonstrateur_data.py		file \| annotate \| diff \| comparison \| revisions
web/static/media/thumbnails/groups/group_default_icon.png		file \| annotate \| diff \| comparison \| revisions
web/static/media/thumbnails/projects/project_default_icon.png		file \| annotate \| diff \| comparison \| revisions
web/static/media/thumbnails/users/user_default_icon.png		file \| annotate \| diff \| comparison \| revisions