src/spel/management/commands/loadspeldata.py
author cavaliet
Tue, 11 Mar 2014 14:42:13 +0100
changeset 14 00a8a44543ab
parent 13 b425f929987d
child 21 3fdccf07adaf
permissions -rwxr-xr-x
clean import and tag management

# -*- coding: utf-8 -*-
'''
Created on Mar 22, 2013

@author: tc
'''

from datetime import datetime
from dircache import listdir
from django.core.management.base import BaseCommand, CommandError
from genericpath import isdir, isfile
from ldt.api.ldt.serializers.cinelabserializer import CinelabSerializer
from ldt.ldt_utils.models import Media, Content, Project
from ldt.ldt_utils.stat import update_stat_content
from ldt.ldt_utils.utils import generate_uuid
from ldt.security import set_current_user, get_current_user_or_admin
from ldt.security.cache import cached_assign
from optparse import make_option
from os.path import join
import json
import lxml.etree
import os.path
import time

import logging
logger = logging.getLogger(__name__)


class Command(BaseCommand):
    '''
    Load medias, contents, project from json generated by ubicast
    '''

    args = 'folder containing folders containing json files'
    help = 'Load medias, contents and project from json files generated by ubicast'
    
    
    def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None):
        val = dict_arg.get(key, default)
        return conv(val) if val else default

    def __safe_decode(self, s):
        if not isinstance(s, basestring):
            return s
        try:
            return s.decode('utf8')
        except:
            try:
                return s.decode('latin1')
            except:
                return s.decode('utf8','replace')

    def handle(self, *args, **options):
        
        # Test path
        if len(args) != 1:
            raise CommandError("The command has no argument or too much arguments. Only one is needed : the folder file path.")
        
        # Check if temporary files already exist
        path = os.path.abspath(args[0])
        if not os.path.exists(path):
            raise CommandError("The folder does not exist.")
        
        do_import = False
        confirm = raw_input("""
    If the database already contains datas, they will be overriden
    
    Do you want to continue ?

    Type 'y' to continue, or 'n' to quit: """)
        do_import = (confirm == "y")
        
        
        # Continue
        if do_import:
            print("Parsing folder %s ..." % path)
            for f in listdir(path):
                json_path = join(path,f,"cinelab.json")
                if isfile(json_path):
                    print("Parsing json file %s ..." % json_path)
                    json_data = False
                    try:
                        file_data = open(json_path)
                        json_data = json.load(file_data)
                        file_data.close()
                    except:
                        print("Error while parsing json file.")
                    if json_data:
                        # Save media and content
                        media, _ = Media.objects.get_or_create(src=f+"/original.mp4", duration=json_data["medias"][0]["meta"]["duration"])
                        media.is_public = True
                        ctt_id = f
                        content = Content.objects.create(iri_id=ctt_id, 
                                                         iriurl=ctt_id+u"/"+ctt_id+u".iri", 
                                                         media_obj=media, 
                                                         title=json_data["meta"]["dc:title"], 
                                                         duration=int(json_data["medias"][0]["meta"]["duration"]),
                                                         content_creation_date = json_data["meta"]["dc:created"])
                        content.is_public = True
                        # Get content front projet
                        proj = content.front_project
                        username = proj.owner.username
                        now = datetime.utcnow().isoformat()
                        # Start data to send to api
                        proj_data = {}
                        proj_data["meta"] = {}
                        proj_data["meta"]["id"] = proj.ldt_id
                        proj_data["meta"]["dc:title"] = proj.title
                        proj_data["meta"]["dc:creator"] = username
                        proj_data["meta"]["dc:description"] = "description added"
                        proj_data["meta"]["dc:created"] = json_data["meta"]["dc:created"]
                        proj_data["meta"]["dc:modified"] = json_data["meta"]["dc:modified"]
                        proj_data["meta"]["dc:contributor"] = username
                        proj_data["medias"] = []
                        proj_data["medias"].append({"id": content.iri_id})
                        # The tags and annotations (main part)
                        proj_data["tags"] = []
                        proj_data["annotations"] = []
                        tags_id_label = {}
                        tags_label_id = {}
                        for a in json_data["annotations"]:
                            # "content": {  "data": { "modalites_sceniques": "costumes,décors",... } }
                            if "content" in a and "data" in a["content"] and type(a["content"]["data"]) == type(dict()):
                                annot_tags = []
                                desc = ""
                                title = ""
                                # Build tags
                                for k,v in a["content"]["data"].iteritems():
                                    if k!="commentaire" and k!="description" and k!="titre":
                                        v = v.split(",")
                                        for val in v:
                                            val = val.strip()
                                            tag_label = k + u": " + val
                                            if val!="":
                                                if not tag_label in tags_label_id:
                                                    tags_label_id[tag_label] = generate_uuid()
                                                    tags_id_label[tags_label_id[tag_label]] = tag_label
                                                    #logger.debug("CREATED")
                                                    #logger.debug(tags_label_id[tag_label] + " = " + tags_id_label[tags_label_id[tag_label]])
                                                    proj_data["tags"].append({"meta": { "dc:title": tag_label }, "id": tags_label_id[tag_label] })
                                                annot_tags.append({"id-ref": tags_label_id[tag_label] })
                                    elif k=="commentaire" or k=="description":
                                        desc = v
                                    elif k=="titre":
                                        title = v
                                # Build annotation with needed fields
                                proj_data["annotations"].append({
                                    "content": {
                                        "mimetype": "application/x-ldt-structured",
                                        "description": desc,
#                                         "img": {
#                                             "src": ""
#                                         },
                                        "title": title,
#                                        "polemics": [ ],
                                    },
                                    "begin": a["begin"],
                                    "meta": {
#                                        "dc:contributor": "admin",
                                        "id-ref": a["type"],
                                        "dc:created": now,
#                                        "dc:modified": "2014-03-04T16:40:23.609971",
                                        "dc:creator": username
                                    },
                                    "end": a["end"],
                                    "tags": annot_tags,
                                    "color": "16763904",
                                    "media": ctt_id,
                                    "id": a["id"]
                                })
                        
                        # The annotation-types
                        proj_data["annotation-types"] = []
                        at_ids = []
                        for at in json_data["annotation_types"]:
                            proj_data["annotation-types"].append({
#                                dc:contributor: "admin",
                                "dc:creator": username,
                                "dc:title": at["id"],
                                "id": at["id"],
#                                dc:created: "2014-03-04T14:51:13.907674",
                                "dc:description": ""
#                                dc:modified: "2014-03-04T14:51:13.907674"
                            })
                            at_ids.append({ "id-ref": at["id"] })
                        # The list of annotation-types
                        list_id = generate_uuid()
                        proj_data["lists"] = [{
                            "items": at_ids,
                            "meta": {
                                "dc:creator": username,
                                "id-ref": ctt_id,
                                "dc:title": "SPEL",
                                "dc:description": ""
                            },
                            "id": list_id
                        }]
                        # The views for default display
                        proj_data["views"] = [{
                            "id": generate_uuid(),
                            "contents": [ ctt_id ],
                            "annotation_types": [atid["id-ref"] for atid in at_ids]
                        }]
                        
                        serializr = CinelabSerializer()
                        serializr.validate_cinelab_json(proj_data)
                        ldt_xml = serializr.cinelab_to_ldt(proj_data)
                        proj.ldt = lxml.etree.tostring(ldt_xml, pretty_print=True)
                        #logger.debug(proj.ldt)
                        proj.save()
                        update_stat_content(content)
                else:
                    print("Ignoring or not exist %s ..." % json_path)
        
        # This is the end
        print("This is the end. DO NOT FORGET TO RUN THE COMMAND 'REINDEX -P' TO GENERATE SEGMENTS AND TAGS.")