# HG changeset patch # User cavaliet # Date 1415030750 -3600 # Node ID 404a0085badf23e46ec560382118b479d703c0b6 # Parent 80aa2d85fd01c2e3c6660f05b6df3a21c88717a7 command to prepare datas for demonstrateur diff -r 80aa2d85fd01 -r 404a0085badf src/spel/management/commands/generate_demonstrateur_data.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/spel/management/commands/generate_demonstrateur_data.py Mon Nov 03 17:05:50 2014 +0100 @@ -0,0 +1,355 @@ +# -*- coding: utf-8 -*- +''' +Created on Mar 22, 2013 + +@author: tc +''' + +from datetime import datetime +from django.conf import settings +from django.contrib.auth import get_user_model +from django.core.management.base import BaseCommand, CommandError +from ldt.api.ldt.serializers.cinelabserializer import CinelabSerializer +from ldt.ldt_utils.models import Media, Content, Project +from ldt.ldt_utils.projectserializer import ProjectJsonSerializer +from ldt.ldt_utils.stat import update_stat_content +from ldt.security import set_current_user +#from string import replace +import copy +import csv +import json +import lxml.etree +#import os.path +#import requests +#import time + +import logging +logger = logging.getLogger(__name__) + +User = get_user_model() + +class Command(BaseCommand): + ''' + Load medias, contents, project from json generated by ubicast + ''' + + args = 'csv data file' + help = 'generates json files for demonstrateur contents and project' + + + def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None): + val = dict_arg.get(key, default) + return conv(val) if val else default + + def __safe_decode(self, s): + if not isinstance(s, basestring): + return s + try: + return s.decode('utf8') + except: + try: + return s.decode('latin1') + except: + return s.decode('utf8','replace') + + def apply_exceptions(self, a): + if a["id"]=="a17" and a["media"]=="travail-sur-lacte-1-jour-15_b822f8": + a["end"] = 2723000 + elif a["id"]=="a86" and a["media"]=="travail-sur-lacte-3-et-lacte-1-jour-22_4c3a75": + a["begin"] = 20925000 + a["end"] = 21290000 + elif a["id"]=="a1" and a["media"]=="premier-bout-a-bout-de-la-piece-jour-23_73654f": + a["end"] = 728000 + elif a["id"]=="a1" and a["media"]=="bout-a-bout-avec-les-enfants_a4c293_SON_SATURE": + a["end"] = 560000 + elif a["id"]=="a1" and a["media"]=="filage-technique-jour-40-deuxieme-partie_58b2a6_SON_GRESILLE": + a["end"] = 560000 + elif a["id"]=="a105" and a["media"]=="travail-sur-lacte-2-jour-18_5e9fe9": + a["begin"] = 19628000 + elif a["id"]=="a117" and a["media"]=="travail-sur-lacte-2-jour-18_5e9fe9": + a["end"] = 22080000 + + + def handle(self, *args, **options): + + # Test path + if len(args) != 1: + raise CommandError("The command has no argument or too much arguments. Only one is needed : the csv file.") + + with open(args[0], 'rU') as datafile: + reader = csv.reader(datafile) + lines = list(reader) + last_content_id = "" + last_content_title = "" + count_content = 0 + + admin = User.objects.filter(is_superuser=True)[0] + set_current_user(admin) + + for line in lines: + # Test content id and main annotation id + content_id = line[1] + pre_annot_id = line[2] + pre_annot_tcin_hms = line[3] + pre_annot_tcin_ms = line[4] + pre_annot_tcout_hms = line[5] + pre_annot_tcout_ms = line[6] + annot_id = line[7] + annot_tcin_hms = line[10] + annot_tcin_ms = line[11] + annot_tcout_hms = line[12] + annot_tcout_ms = line[13] + post_annot_id = line[14] + post_annot_tcin_hms = line[15] + post_annot_tcin_ms = line[16] + post_annot_tcout_hms = line[17] + post_annot_tcout_ms = line[18] + created_ctt_id = line[19] + try: + created_tcin_ms = int(line[21]) + except: + created_tcin_ms = False + try: + created_ctt_dur = int(line[25]) + except: + created_ctt_dur = False + if content_id != "" and created_ctt_id != "" and created_tcin_ms and created_ctt_dur: + if last_content_id != content_id: + last_content_id = content_id + count_content = 0 + print "get content iri_id = " + last_content_id + ctt_source = Content.objects.filter(iri_id = last_content_id).select_related("front_project")[0] + ps = ProjectJsonSerializer(ctt_source.front_project) + print "project ldt_id = " + ctt_source.front_project.ldt_id + project_source_data = ps.serialize_to_cinelab() + + + if annot_id != "" and project_source_data: + count_content += 1 + # Get tc from annotations datas + tc_offset = annot_tcin_ms + #tc_end = annot_tcout_ms + if pre_annot_id != "": + tc_offset = pre_annot_tcin_ms + tc_offset = int(tc_offset) + #if post_annot_id != "": + # tc_end = post_annot_tcout_ms + + # data to create new data + ctt_title = content_id + (u" 0" if count_content<10 else u" ") + unicode(count_content) + media_src = "/data/demonstrateur/"+content_id+"/" + created_ctt_id + ".mp4" + if hasattr(settings, "DEMONSTRATEUR_DOMAIN") and len(settings.DEMONSTRATEUR_DOMAIN)>0: + media_src = settings.DEMONSTRATEUR_DOMAIN + media_src + media, _ = Media.objects.get_or_create(src=media_src, duration=created_ctt_dur) + media.is_public = True + content, _ = Content.objects.get_or_create(iri_id = created_ctt_id, + iriurl = created_ctt_id+u"/"+created_ctt_id+u".iri", + media_obj = media, + title = ctt_title, + duration = created_ctt_dur) + content.is_public = True + update_stat_content(content) + # Get content front projet + proj = content.front_project + ps = ProjectJsonSerializer(proj) + proj_data = ps.serialize_to_cinelab() + proj_data["annotations"] = [] + for a in project_source_data["annotations"]: + #print "get content 12" + if a["id"] in [pre_annot_id, annot_id, post_annot_id]: + self.apply_exceptions(a) + temp_a = copy.deepcopy(a) + b = temp_a["begin"] + temp_a["begin"] = temp_a["begin"] - created_tcin_ms + temp_a["end"] = temp_a["end"] - created_tcin_ms + temp_a["media"] = created_ctt_id + if temp_a["begin"] < 0: + print created_ctt_id + " : " + temp_a["id"] + " : < 0 : serveur : " + str(b) + ", excel : " + str(tc_offset) + ", created_tcin_ms : " + str(created_tcin_ms) + if temp_a["end"] > created_ctt_dur: + print created_ctt_id + " : " + temp_a["id"] + " : > " + str(created_ctt_dur) + " : " + str(temp_a["end"]) + proj_data["annotations"].append(temp_a) + # The annotation-types + for at in project_source_data["annotation-types"]: + #print "get content 15" + proj_data["annotation-types"].append(at) + proj_data["views"][0]["annotation_types"].append(at["id"]) + proj_data["lists"][0]["items"].append({ "id-ref": at["id"] }) + # The tags + proj_data["tags"] = project_source_data["tags"] + #print json.dumps(proj_data, indent=2) + + serializr = CinelabSerializer() + serializr.validate_cinelab_json(proj_data) + ldt_xml = serializr.cinelab_to_ldt(proj_data) + #print lxml.etree.tostring(ldt_xml, pretty_print=True) + proj.ldt = lxml.etree.tostring(ldt_xml, pretty_print=True) + #logger.debug(proj.ldt) + proj.save() + update_stat_content(content) + + print ctt_title + " : " + created_ctt_id + + else: + print "Nope" + + """ + # Continue + if do_import: + print("Parsing folder %s ..." % path) + for dirpath, dirnames, filenames in os.walk(path): + #print("Parsing walk %s, %s, %s" % (dirpath, dirnames, filenames)) + for filename in filenames: + if filename == "cinelab_iri.json": + ctt_id = os.path.basename(dirpath) + # dirname is like "acte-1-stuff-thing_1234" and we want "Acte 1 stuff thing" + content_title = ctt_id.split("_")[0].replace("-"," ").capitalize() + json_path = os.path.join(dirpath, filename) + print("Parsing json file %s ..." % json_path) + json_data = False + try: + file_data = open(json_path) + json_data = json.load(file_data) + file_data.close() + except: + print("Error while parsing json file.") + if json_data: + dur = int(json_data["medias"][0]["meta"]["duration"]) + # Save media and content + media, _ = Media.objects.get_or_create(src="/data/opera/"+ctt_id+"/720p.mp4", duration=dur) + media.is_public = True + content = Content.objects.create(iri_id=ctt_id, + iriurl=ctt_id+u"/"+ctt_id+u".iri", + media_obj=media, + title=content_title, + duration=dur, + content_creation_date = json_data["meta"]["dc:created"]) + content.is_public = True + content.tags.add("content_opera") + # Get content front projet + proj = content.front_project + username = proj.owner.username + now = datetime.utcnow().isoformat() + # Start data to send to api + proj_data = {} + proj_data["meta"] = {} + proj_data["meta"]["id"] = proj.ldt_id + proj_data["meta"]["dc:title"] = proj.title + proj_data["meta"]["dc:creator"] = username + proj_data["meta"]["dc:description"] = "description added" + proj_data["meta"]["dc:created"] = json_data["meta"]["dc:created"] + proj_data["meta"]["dc:modified"] = json_data["meta"]["dc:modified"] + proj_data["meta"]["dc:contributor"] = username + proj_data["medias"] = [] + proj_data["medias"].append({"id": content.iri_id}) + # The tags and annotations (main part) + proj_data["tags"] = [] + proj_data["annotations"] = [] + tags_id_label = {} + tags_label_id = {} + for a in json_data["annotations"]: + # "content": { "data": { "modalites_sceniques": "costumes,décors",... } } + # Opera management : + # tag "modalites_sceniques" becomes "opera_modalites_sceniques" + # tag "mesure" becomes "opera_mesure" + # tag "personnages" becomes "opera_personnages" + # tag "scene" becomes "opera_scene" + # tag "type_travail" becomes "opera_type_travail" + # tag "acte" becomes "opera_acte" + if "content" in a and "data" in a["content"] and type(a["content"]["data"]) == type(dict()): + annot_tags = [] + desc = "" + title = "" + # Build tags + for k,v in a["content"]["data"].iteritems(): + if k!="commentaire" and k!="description" and k!="titre": + if k=="acte" or k=="scene": + v = int(v) + k = "opera_" + k + v = unicode(v).split(",") + for val in v: + val = val.strip() + tag_label = k + u": " + val + if val!="": + if not tag_label in tags_label_id: + tags_label_id[tag_label] = generate_uuid() + tags_id_label[tags_label_id[tag_label]] = tag_label + #logger.debug("CREATED") + #logger.debug(tags_label_id[tag_label] + " = " + tags_id_label[tags_label_id[tag_label]]) + proj_data["tags"].append({"meta": { "dc:title": tag_label }, "id": tags_label_id[tag_label] }) + annot_tags.append({"id-ref": tags_label_id[tag_label] }) + elif k=="commentaire" or k=="description": + desc = v + elif k=="titre": + title = v + # Build annotation with needed fields + proj_data["annotations"].append({ + "content": { + "mimetype": "application/x-ldt-structured", + "description": desc, + # "img": { + # "src": "" + # }, + "title": title, + # "polemics": [ ], + }, + "begin": a["begin"], + "meta": { + # "dc:contributor": "admin", + "id-ref": a["type"], + "dc:created": now, + # "dc:modified": "2014-03-04T16:40:23.609971", + "dc:creator": username + }, + "end": a["end"], + "tags": annot_tags, + "color": "16763904", + "media": ctt_id, + "id": a["id"] + }) + + # The annotation-types + proj_data["annotation-types"] = [] + at_ids = [] + for at in json_data["annotation_types"]: + proj_data["annotation-types"].append({ + # dc:contributor: "admin", + "dc:creator": username, + "dc:title": at["id"], + "id": at["id"], + # dc:created: "2014-03-04T14:51:13.907674", + "dc:description": "" + # dc:modified: "2014-03-04T14:51:13.907674" + }) + at_ids.append({ "id-ref": at["id"] }) + # The list of annotation-types + list_id = generate_uuid() + proj_data["lists"] = [{ + "items": at_ids, + "meta": { + "dc:creator": username, + "id-ref": ctt_id, + "dc:title": "SPEL", + "dc:description": "" + }, + "id": list_id + }] + # The views for default display + proj_data["views"] = [{ + "id": generate_uuid(), + "contents": [ ctt_id ], + "annotation_types": [atid["id-ref"] for atid in at_ids] + }] + + serializr = CinelabSerializer() + serializr.validate_cinelab_json(proj_data) + ldt_xml = serializr.cinelab_to_ldt(proj_data) + proj.ldt = lxml.etree.tostring(ldt_xml, pretty_print=True) + #logger.debug(proj.ldt) + proj.save() + update_stat_content(content) + else: + print("Ignoring or not exist.") + """ + # This is the end + print("This is the end.") + \ No newline at end of file diff -r 80aa2d85fd01 -r 404a0085badf web/static/media/thumbnails/groups/group_default_icon.png diff -r 80aa2d85fd01 -r 404a0085badf web/static/media/thumbnails/projects/project_default_icon.png diff -r 80aa2d85fd01 -r 404a0085badf web/static/media/thumbnails/users/user_default_icon.png