| author | cavaliet |
| Tue, 08 Jul 2014 17:50:11 +0200 | |
| changeset 115 | 231c0319666c |
| parent 108 | 878a2c621fcf |
| permissions | -rwxr-xr-x |
| 84 | 1 |
# -*- coding: utf-8 -*- |
2 |
''' |
|
3 |
Created on Mar 22, 2013 |
|
4 |
||
5 |
@author: tc |
|
6 |
''' |
|
7 |
||
8 |
from datetime import datetime |
|
9 |
from dircache import listdir |
|
10 |
from django.core.management.base import BaseCommand, CommandError |
|
11 |
from genericpath import isdir, isfile |
|
12 |
from ldt.api.ldt.serializers.cinelabserializer import CinelabSerializer |
|
13 |
from ldt.ldt_utils.models import Media, Content, Project |
|
14 |
from ldt.ldt_utils.stat import update_stat_content |
|
15 |
from ldt.ldt_utils.utils import generate_uuid |
|
16 |
from ldt.security import set_current_user, get_current_user_or_admin |
|
17 |
from ldt.security.cache import cached_assign |
|
18 |
from optparse import make_option |
|
19 |
from os.path import join |
|
20 |
import json |
|
21 |
import lxml.etree |
|
22 |
import os.path |
|
23 |
import time |
|
24 |
||
25 |
import logging |
|
| 108 | 26 |
from string import replace |
| 84 | 27 |
logger = logging.getLogger(__name__) |
28 |
||
29 |
||
30 |
class Command(BaseCommand): |
|
31 |
''' |
|
32 |
Load medias, contents, project from json generated by ubicast |
|
33 |
''' |
|
34 |
||
35 |
args = 'folder containing folders containing json files' |
|
36 |
help = 'Load medias, contents and project from json files generated by ubicast for OPERA contents' |
|
37 |
||
38 |
||
39 |
def __safe_get(self, dict_arg, key, conv = lambda x: x, default= None): |
|
40 |
val = dict_arg.get(key, default) |
|
41 |
return conv(val) if val else default |
|
42 |
||
43 |
def __safe_decode(self, s): |
|
44 |
if not isinstance(s, basestring): |
|
45 |
return s |
|
46 |
try: |
|
47 |
return s.decode('utf8') |
|
48 |
except: |
|
49 |
try: |
|
50 |
return s.decode('latin1') |
|
51 |
except: |
|
52 |
return s.decode('utf8','replace') |
|
53 |
||
54 |
def handle(self, *args, **options): |
|
55 |
||
56 |
# Test path |
|
57 |
if len(args) != 1: |
|
58 |
raise CommandError("The command has no argument or too much arguments. Only one is needed : the folder file path.") |
|
59 |
||
60 |
# Check if temporary files already exist |
|
61 |
path = os.path.abspath(args[0]) |
|
62 |
if not os.path.exists(path): |
|
63 |
raise CommandError("The folder does not exist.") |
|
64 |
||
65 |
do_import = False |
|
66 |
confirm = raw_input(""" |
|
67 |
If the database already contains datas, they will be overriden |
|
68 |
|
|
69 |
Do you want to continue ? |
|
70 |
||
71 |
Type 'y' to continue, or 'n' to quit: """) |
|
72 |
do_import = (confirm == "y") |
|
73 |
||
74 |
# Continue |
|
75 |
if do_import: |
|
76 |
print("Parsing folder %s ..." % path) |
|
77 |
for dirpath, dirnames, filenames in os.walk(path): |
|
78 |
#print("Parsing walk %s, %s, %s" % (dirpath, dirnames, filenames)) |
|
79 |
for filename in filenames: |
|
| 104 | 80 |
if filename == "cinelab_iri.json": |
| 108 | 81 |
ctt_id = os.path.basename(dirpath) |
82 |
# dirname is like "acte-1-stuff-thing_1234" and we want "Acte 1 stuff thing" |
|
83 |
content_title = ctt_id.split("_")[0].replace("-"," ").capitalize() |
|
| 84 | 84 |
json_path = os.path.join(dirpath, filename) |
85 |
print("Parsing json file %s ..." % json_path) |
|
86 |
json_data = False |
|
87 |
try: |
|
88 |
file_data = open(json_path) |
|
89 |
json_data = json.load(file_data) |
|
90 |
file_data.close() |
|
91 |
except: |
|
92 |
print("Error while parsing json file.") |
|
93 |
if json_data: |
|
94 |
dur = int(json_data["medias"][0]["meta"]["duration"]) |
|
95 |
# Save media and content |
|
| 115 | 96 |
media, _ = Media.objects.get_or_create(src="/data/opera/"+ctt_id+"/720p.mp4", duration=dur) |
| 84 | 97 |
media.is_public = True |
98 |
content = Content.objects.create(iri_id=ctt_id, |
|
99 |
iriurl=ctt_id+u"/"+ctt_id+u".iri", |
|
100 |
media_obj=media, |
|
| 108 | 101 |
title=content_title, |
| 84 | 102 |
duration=dur, |
103 |
content_creation_date = json_data["meta"]["dc:created"]) |
|
104 |
content.is_public = True |
|
105 |
content.tags.add("content_opera") |
|
106 |
# Get content front projet |
|
107 |
proj = content.front_project |
|
108 |
username = proj.owner.username |
|
109 |
now = datetime.utcnow().isoformat() |
|
110 |
# Start data to send to api |
|
111 |
proj_data = {} |
|
112 |
proj_data["meta"] = {} |
|
113 |
proj_data["meta"]["id"] = proj.ldt_id |
|
114 |
proj_data["meta"]["dc:title"] = proj.title |
|
115 |
proj_data["meta"]["dc:creator"] = username |
|
116 |
proj_data["meta"]["dc:description"] = "description added" |
|
117 |
proj_data["meta"]["dc:created"] = json_data["meta"]["dc:created"] |
|
118 |
proj_data["meta"]["dc:modified"] = json_data["meta"]["dc:modified"] |
|
119 |
proj_data["meta"]["dc:contributor"] = username |
|
120 |
proj_data["medias"] = [] |
|
121 |
proj_data["medias"].append({"id": content.iri_id}) |
|
122 |
# The tags and annotations (main part) |
|
123 |
proj_data["tags"] = [] |
|
124 |
proj_data["annotations"] = [] |
|
125 |
tags_id_label = {} |
|
126 |
tags_label_id = {} |
|
127 |
for a in json_data["annotations"]: |
|
128 |
# "content": { "data": { "modalites_sceniques": "costumes,décors",... } } |
|
129 |
# Opera management : |
|
130 |
# tag "modalites_sceniques" becomes "opera_modalites_sceniques" |
|
131 |
# tag "mesure" becomes "opera_mesure" |
|
132 |
# tag "personnages" becomes "opera_personnages" |
|
133 |
# tag "scene" becomes "opera_scene" |
|
134 |
# tag "type_travail" becomes "opera_type_travail" |
|
135 |
# tag "acte" becomes "opera_acte" |
|
136 |
if "content" in a and "data" in a["content"] and type(a["content"]["data"]) == type(dict()): |
|
137 |
annot_tags = [] |
|
138 |
desc = "" |
|
139 |
title = "" |
|
140 |
# Build tags |
|
141 |
for k,v in a["content"]["data"].iteritems(): |
|
142 |
if k!="commentaire" and k!="description" and k!="titre": |
|
|
88
7111c4d04cbc
v0.5.8 remove opera command and enhance import opera
cavaliet
parents:
84
diff
changeset
|
143 |
if k=="acte" or k=="scene": |
|
7111c4d04cbc
v0.5.8 remove opera command and enhance import opera
cavaliet
parents:
84
diff
changeset
|
144 |
v = int(v) |
| 84 | 145 |
k = "opera_" + k |
146 |
v = unicode(v).split(",") |
|
147 |
for val in v: |
|
148 |
val = val.strip() |
|
149 |
tag_label = k + u": " + val |
|
150 |
if val!="": |
|
151 |
if not tag_label in tags_label_id: |
|
152 |
tags_label_id[tag_label] = generate_uuid() |
|
153 |
tags_id_label[tags_label_id[tag_label]] = tag_label |
|
154 |
#logger.debug("CREATED") |
|
155 |
#logger.debug(tags_label_id[tag_label] + " = " + tags_id_label[tags_label_id[tag_label]]) |
|
156 |
proj_data["tags"].append({"meta": { "dc:title": tag_label }, "id": tags_label_id[tag_label] }) |
|
157 |
annot_tags.append({"id-ref": tags_label_id[tag_label] }) |
|
158 |
elif k=="commentaire" or k=="description": |
|
159 |
desc = v |
|
160 |
elif k=="titre": |
|
161 |
title = v |
|
162 |
# Build annotation with needed fields |
|
163 |
proj_data["annotations"].append({ |
|
164 |
"content": { |
|
165 |
"mimetype": "application/x-ldt-structured", |
|
166 |
"description": desc, |
|
167 |
# "img": { |
|
168 |
# "src": "" |
|
169 |
# }, |
|
170 |
"title": title, |
|
171 |
# "polemics": [ ], |
|
172 |
}, |
|
173 |
"begin": a["begin"], |
|
174 |
"meta": { |
|
175 |
# "dc:contributor": "admin", |
|
176 |
"id-ref": a["type"], |
|
177 |
"dc:created": now, |
|
178 |
# "dc:modified": "2014-03-04T16:40:23.609971", |
|
179 |
"dc:creator": username |
|
180 |
}, |
|
181 |
"end": a["end"], |
|
182 |
"tags": annot_tags, |
|
183 |
"color": "16763904", |
|
184 |
"media": ctt_id, |
|
185 |
"id": a["id"] |
|
186 |
}) |
|
187 |
||
188 |
# The annotation-types |
|
189 |
proj_data["annotation-types"] = [] |
|
190 |
at_ids = [] |
|
191 |
for at in json_data["annotation_types"]: |
|
192 |
proj_data["annotation-types"].append({ |
|
193 |
# dc:contributor: "admin", |
|
194 |
"dc:creator": username, |
|
195 |
"dc:title": at["id"], |
|
196 |
"id": at["id"], |
|
197 |
# dc:created: "2014-03-04T14:51:13.907674", |
|
198 |
"dc:description": "" |
|
199 |
# dc:modified: "2014-03-04T14:51:13.907674" |
|
200 |
}) |
|
201 |
at_ids.append({ "id-ref": at["id"] }) |
|
202 |
# The list of annotation-types |
|
203 |
list_id = generate_uuid() |
|
204 |
proj_data["lists"] = [{ |
|
205 |
"items": at_ids, |
|
206 |
"meta": { |
|
207 |
"dc:creator": username, |
|
208 |
"id-ref": ctt_id, |
|
209 |
"dc:title": "SPEL", |
|
210 |
"dc:description": "" |
|
211 |
}, |
|
212 |
"id": list_id |
|
213 |
}] |
|
214 |
# The views for default display |
|
215 |
proj_data["views"] = [{ |
|
216 |
"id": generate_uuid(), |
|
217 |
"contents": [ ctt_id ], |
|
218 |
"annotation_types": [atid["id-ref"] for atid in at_ids] |
|
219 |
}] |
|
220 |
||
221 |
serializr = CinelabSerializer() |
|
222 |
serializr.validate_cinelab_json(proj_data) |
|
223 |
ldt_xml = serializr.cinelab_to_ldt(proj_data) |
|
224 |
proj.ldt = lxml.etree.tostring(ldt_xml, pretty_print=True) |
|
225 |
#logger.debug(proj.ldt) |
|
226 |
proj.save() |
|
227 |
update_stat_content(content) |
|
228 |
else: |
|
229 |
print("Ignoring or not exist.") |
|
230 |
||
231 |
# This is the end |
|
232 |
print("This is the end. DO NOT FORGET TO RUN THE COMMAND 'REINDEX -P' TO GENERATE SEGMENTS AND TAGS.") |
|
233 |