spel: src/spel/management/commands/loadoperadata.py@7111c4d04cbc


# -*- coding: utf-8 -*-
'''
Created on Mar 22, 2013

@author: tc
'''

from datetime import datetime
from dircache import listdir
from django.core.management.base import BaseCommand, CommandError
from genericpath import isdir, isfile
from ldt.api.ldt.serializers.cinelabserializer import CinelabSerializer
from ldt.ldt_utils.models import Media, Content, Project
from ldt.ldt_utils.stat import update_stat_content
from ldt.ldt_utils.utils import generate_uuid
from ldt.security import set_current_user, get_current_user_or_admin
from ldt.security.cache import cached_assign
from optparse import make_option
from os.path import join
import json
import lxml.etree
import os.path
import time

import logging
logger = logging.getLogger(__name__)


class Command(BaseCommand):
    '''
    Load medias, contents, project from json generated by ubicast
    '''

    args = 'folder containing folders containing json files'
    help = 'Load medias, contents and project from json files generated by ubicast for OPERA contents'
    
    
    def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None):
        val = dict_arg.get(key, default)
        return conv(val) if val else default

    def __safe_decode(self, s):
        if not isinstance(s, basestring):
            return s
        try:
            return s.decode('utf8')
        except:
            try:
                return s.decode('latin1')
            except:
                return s.decode('utf8','replace')

    def handle(self, *args, **options):
        
        # Test path
        if len(args) != 1:
            raise CommandError("The command has no argument or too much arguments. Only one is needed : the folder file path.")
        
        # Check if temporary files already exist
        path = os.path.abspath(args[0])
        if not os.path.exists(path):
            raise CommandError("The folder does not exist.")
        
        do_import = False
        confirm = raw_input("""
    If the database already contains datas, they will be overriden
    
    Do you want to continue ?

    Type 'y' to continue, or 'n' to quit: """)
        do_import = (confirm == "y")
        
        # Continue
        if do_import:
            print("Parsing folder %s ..." % path)
            for dirpath, dirnames, filenames in os.walk(path):
                #print("Parsing walk %s, %s, %s" % (dirpath, dirnames, filenames))
                for filename in filenames:
                    if filename == "cinelab.json":
                        json_path = os.path.join(dirpath, filename)
                        print("json_path %s" % json_path)
                        print("Parsing json file %s ..." % json_path)
                        json_data = False
                        try:
                            file_data = open(json_path)
                            json_data = json.load(file_data)
                            file_data.close()
                        except:
                            print("Error while parsing json file.")
                        if json_data:
                            ctt_id = os.path.basename(dirpath)
                            dur = int(json_data["medias"][0]["meta"]["duration"])
                            # Save media and content
                            media, _ = Media.objects.get_or_create(src="/data/opera/"+ctt_id+"/original_web.mp4", duration=dur)
                            media.is_public = True
                            content = Content.objects.create(iri_id=ctt_id, 
                                                             iriurl=ctt_id+u"/"+ctt_id+u".iri", 
                                                             media_obj=media, 
                                                             title=json_data["meta"]["ucm:oeuvre"] + " " + json_data["meta"]["dc:created"][:10], 
                                                             duration=dur,
                                                             content_creation_date = json_data["meta"]["dc:created"])
                            content.is_public = True
                            content.tags.add("content_opera")
                            # Get content front projet
                            proj = content.front_project
                            username = proj.owner.username
                            now = datetime.utcnow().isoformat()
                            # Start data to send to api
                            proj_data = {}
                            proj_data["meta"] = {}
                            proj_data["meta"]["id"] = proj.ldt_id
                            proj_data["meta"]["dc:title"] = proj.title
                            proj_data["meta"]["dc:creator"] = username
                            proj_data["meta"]["dc:description"] = "description added"
                            proj_data["meta"]["dc:created"] = json_data["meta"]["dc:created"]
                            proj_data["meta"]["dc:modified"] = json_data["meta"]["dc:modified"]
                            proj_data["meta"]["dc:contributor"] = username
                            proj_data["medias"] = []
                            proj_data["medias"].append({"id": content.iri_id})
                            # The tags and annotations (main part)
                            proj_data["tags"] = []
                            proj_data["annotations"] = []
                            tags_id_label = {}
                            tags_label_id = {}
                            for a in json_data["annotations"]:
                                # "content": {  "data": { "modalites_sceniques": "costumes,décors",... } }
                                # Opera management :
                                # tag "modalites_sceniques" becomes "opera_modalites_sceniques" 
                                # tag "mesure" becomes "opera_mesure"
                                # tag "personnages" becomes "opera_personnages"
                                # tag "scene" becomes "opera_scene"
                                # tag "type_travail" becomes "opera_type_travail" 
                                # tag "acte" becomes "opera_acte"
                                if "content" in a and "data" in a["content"] and type(a["content"]["data"]) == type(dict()):
                                    annot_tags = []
                                    desc = ""
                                    title = ""
                                    # Build tags
                                    for k,v in a["content"]["data"].iteritems():
                                        if k!="commentaire" and k!="description" and k!="titre":
                                            if k=="acte" or k=="scene":
                                                v = int(v)
                                            k = "opera_" + k
                                            v = unicode(v).split(",")
                                            for val in v:
                                                val = val.strip()
                                                tag_label = k + u": " + val
                                                if val!="":
                                                    if not tag_label in tags_label_id:
                                                        tags_label_id[tag_label] = generate_uuid()
                                                        tags_id_label[tags_label_id[tag_label]] = tag_label
                                                        #logger.debug("CREATED")
                                                        #logger.debug(tags_label_id[tag_label] + " = " + tags_id_label[tags_label_id[tag_label]])
                                                        proj_data["tags"].append({"meta": { "dc:title": tag_label }, "id": tags_label_id[tag_label] })
                                                    annot_tags.append({"id-ref": tags_label_id[tag_label] })
                                        elif k=="commentaire" or k=="description":
                                            desc = v
                                        elif k=="titre":
                                            title = v
                                    # Build annotation with needed fields
                                    proj_data["annotations"].append({
                                        "content": {
                                            "mimetype": "application/x-ldt-structured",
                                            "description": desc,
    #                                         "img": {
    #                                             "src": ""
    #                                         },
                                            "title": title,
    #                                        "polemics": [ ],
                                        },
                                        "begin": a["begin"],
                                        "meta": {
    #                                        "dc:contributor": "admin",
                                            "id-ref": a["type"],
                                            "dc:created": now,
    #                                        "dc:modified": "2014-03-04T16:40:23.609971",
                                            "dc:creator": username
                                        },
                                        "end": a["end"],
                                        "tags": annot_tags,
                                        "color": "16763904",
                                        "media": ctt_id,
                                        "id": a["id"]
                                    })
                            
                            # The annotation-types
                            proj_data["annotation-types"] = []
                            at_ids = []
                            for at in json_data["annotation_types"]:
                                proj_data["annotation-types"].append({
    #                                dc:contributor: "admin",
                                    "dc:creator": username,
                                    "dc:title": at["id"],
                                    "id": at["id"],
    #                                dc:created: "2014-03-04T14:51:13.907674",
                                    "dc:description": ""
    #                                dc:modified: "2014-03-04T14:51:13.907674"
                                })
                                at_ids.append({ "id-ref": at["id"] })
                            # The list of annotation-types
                            list_id = generate_uuid()
                            proj_data["lists"] = [{
                                "items": at_ids,
                                "meta": {
                                    "dc:creator": username,
                                    "id-ref": ctt_id,
                                    "dc:title": "SPEL",
                                    "dc:description": ""
                                },
                                "id": list_id
                            }]
                            # The views for default display
                            proj_data["views"] = [{
                                "id": generate_uuid(),
                                "contents": [ ctt_id ],
                                "annotation_types": [atid["id-ref"] for atid in at_ids]
                            }]
                            
                            serializr = CinelabSerializer()
                            serializr.validate_cinelab_json(proj_data)
                            ldt_xml = serializr.cinelab_to_ldt(proj_data)
                            proj.ldt = lxml.etree.tostring(ldt_xml, pretty_print=True)
                            #logger.debug(proj.ldt)
                            proj.save()
                            update_stat_content(content)
                    else:
                        print("Ignoring or not exist.")
        
        # This is the end
        print("This is the end. DO NOT FORGET TO RUN THE COMMAND 'REINDEX -P' TO GENERATE SEGMENTS AND TAGS.")
author	cavaliet
	Thu, 15 May 2014 15:28:39 +0200
changeset 88	7111c4d04cbc
parent 84	d58673e1dc90
child 104	cc1cf7e38c83
permissions	-rwxr-xr-x