|
1
|
1 |
from datetime import datetime |
|
|
2 |
from django.utils.datastructures import SortedDict |
|
|
3 |
from ldt.ldt_utils.models import Content, Project |
|
|
4 |
import logging |
|
|
5 |
import lxml.etree |
|
|
6 |
import uuid |
|
|
7 |
|
|
|
8 |
DATE_FORMATS = ["%d/%m/%Y","%Y-%m-%d"] |
|
|
9 |
|
|
|
10 |
""" |
|
|
11 |
Serialize a project object to a cinelab compatible array |
|
|
12 |
""" |
|
|
13 |
class ProjectSerializer: |
|
|
14 |
|
|
|
15 |
def __init__(self, project, from_contents=True, from_display=True): |
|
|
16 |
self.project = project |
|
|
17 |
self.parsed = False |
|
|
18 |
self.ldt_doc = None |
|
|
19 |
self.medias_dict = SortedDict() |
|
|
20 |
self.annotations_dict = SortedDict() |
|
|
21 |
self.annotations_by_annotation_types = {} |
|
|
22 |
self.tags = {} |
|
|
23 |
self.tags_dict = SortedDict() |
|
|
24 |
self.annotation_types_dict = SortedDict() |
|
|
25 |
self.views_dict = SortedDict() |
|
|
26 |
self.lists_dict = SortedDict() |
|
|
27 |
self.serialize_contents = from_contents |
|
|
28 |
self.from_display = from_display |
|
|
29 |
self.display_contents_list = [] |
|
|
30 |
self.display_cuttings_list = [] |
|
|
31 |
self.display_ensemble_list = [] |
|
|
32 |
|
|
|
33 |
|
|
|
34 |
def __parse_views(self, display_node_list): |
|
|
35 |
for display_node in display_node_list: |
|
|
36 |
display_id = display_node.get(u"id", None) |
|
|
37 |
if not display_id: |
|
|
38 |
continue |
|
|
39 |
content_list = [] |
|
|
40 |
cuttings_list = [] |
|
|
41 |
new_display = { |
|
|
42 |
"id": display_id, |
|
|
43 |
"contents": content_list, |
|
|
44 |
"annotation_types": cuttings_list, |
|
|
45 |
} |
|
|
46 |
|
|
|
47 |
for content_node in display_node.xpath("content"): |
|
|
48 |
content_id = content_node.get("id") |
|
|
49 |
if content_id not in content_list: |
|
|
50 |
content_list.append(content_id) |
|
|
51 |
if content_id not in self.display_contents_list: |
|
|
52 |
self.display_contents_list.append(content_id) |
|
|
53 |
for cutting_node in content_node.xpath("decoupage"): |
|
|
54 |
cutting_id = cutting_node.get("id") |
|
|
55 |
if cutting_id not in cuttings_list: |
|
|
56 |
cuttings_list.append(cutting_id) |
|
|
57 |
if cutting_id not in self.display_cuttings_list: |
|
|
58 |
self.display_cuttings_list.append(cutting_id) |
|
|
59 |
ensemble_id = cutting_node.get("idens") |
|
|
60 |
if ensemble_id not in self.display_ensemble_list: |
|
|
61 |
self.display_ensemble_list.append(ensemble_id) |
|
|
62 |
self.views_dict[display_id] = new_display |
|
|
63 |
|
|
|
64 |
|
|
|
65 |
|
|
|
66 |
def __parse_ensemble(self, ensemble_node, content): |
|
|
67 |
|
|
|
68 |
ensemble_id = ensemble_node.attrib[u"id"] |
|
|
69 |
ensemble_author = ensemble_node.attrib[u"author"] |
|
|
70 |
ensemble_title = ensemble_node.attrib[u"title"] |
|
|
71 |
ensemble_description = ensemble_node.attrib[u"abstract"] |
|
|
72 |
ensemble_created = datetime.utcnow().isoformat() |
|
|
73 |
ensemble_modified = ensemble_created |
|
|
74 |
|
|
|
75 |
list_items = [] |
|
|
76 |
new_list = { |
|
|
77 |
"id" : ensemble_id, |
|
|
78 |
"items" : list_items, |
|
|
79 |
"meta" : { |
|
|
80 |
"dc:creator":ensemble_author, |
|
|
81 |
"dc:created": ensemble_created, |
|
|
82 |
"dc:contributor":"undefined", |
|
|
83 |
"dc:modified": ensemble_modified, |
|
|
84 |
"dc:title":ensemble_title, |
|
|
85 |
"dc:description": ensemble_description, |
|
|
86 |
"id-ref":content.iri_id, |
|
|
87 |
"editable":"false" |
|
|
88 |
} |
|
|
89 |
} |
|
|
90 |
|
|
|
91 |
|
|
|
92 |
for decoupage_node in ensemble_node: |
|
|
93 |
if decoupage_node.tag != "decoupage" : |
|
|
94 |
continue |
|
|
95 |
|
|
|
96 |
decoupage_id = decoupage_node.attrib[ u"id"] |
|
|
97 |
if self.from_display and decoupage_id not in self.display_cuttings_list: |
|
|
98 |
continue |
|
|
99 |
decoupage_creator = decoupage_node.attrib[u"author"] |
|
|
100 |
if not decoupage_creator: |
|
|
101 |
decoupage_creator = "IRI" |
|
|
102 |
decoupage_contributor = decoupage_creator |
|
|
103 |
date_str = decoupage_node.get(u"date") |
|
|
104 |
decoupage_created = None |
|
|
105 |
if date_str : |
|
|
106 |
for date_format in DATE_FORMATS: |
|
|
107 |
try: |
|
|
108 |
decoupage_created = datetime.strptime(date_str,date_format).isoformat() |
|
|
109 |
break |
|
|
110 |
except Exception: |
|
|
111 |
decoupage_created = None |
|
|
112 |
if decoupage_created is None: |
|
|
113 |
decoupage_created = datetime.utcnow().isoformat() |
|
|
114 |
decoupage_modified = decoupage_created |
|
|
115 |
|
|
|
116 |
decoupage_title = "" |
|
|
117 |
for txtRes in decoupage_node.xpath("title/text()", smart_strings=False): |
|
|
118 |
decoupage_title += txtRes |
|
|
119 |
|
|
|
120 |
decoupage_description = "" |
|
|
121 |
for txtRes in decoupage_node.xpath("abstract/text()", smart_strings=False): |
|
|
122 |
decoupage_description += txtRes |
|
|
123 |
|
|
|
124 |
|
|
|
125 |
list_items.append({"id-ref":decoupage_id}) |
|
|
126 |
|
|
|
127 |
new_annotation_types = { |
|
|
128 |
"id":decoupage_id, |
|
|
129 |
"dc:creator":decoupage_creator, |
|
|
130 |
"dc:created":decoupage_created, |
|
|
131 |
"dc:contributor":decoupage_contributor, |
|
|
132 |
"dc:modified":decoupage_modified, |
|
|
133 |
"dc:title":decoupage_title, |
|
|
134 |
"dc:description":decoupage_description |
|
|
135 |
} |
|
|
136 |
|
|
|
137 |
self.annotation_types_dict[decoupage_id] = new_annotation_types |
|
|
138 |
self.annotations_by_annotation_types[decoupage_id] = [] |
|
|
139 |
|
|
|
140 |
res = decoupage_node.xpath("elements/element") |
|
|
141 |
for element_node in res: |
|
|
142 |
|
|
|
143 |
element_id = element_node.attrib[u"id"] |
|
|
144 |
element_begin = element_node.attrib[u"begin"] |
|
|
145 |
element_duration = element_node.attrib[u"dur"] |
|
|
146 |
element_media = content.iri_id |
|
|
147 |
element_color = element_node.attrib[u"color"] |
|
|
148 |
|
|
|
149 |
element_title = "" |
|
|
150 |
for txtRes in element_node.xpath("title/text()", smart_strings=False): |
|
|
151 |
element_title += txtRes |
|
|
152 |
|
|
|
153 |
element_description = "" |
|
|
154 |
for txtRes in element_node.xpath("abstract/text()", smart_strings=False): |
|
|
155 |
element_description += txtRes |
|
|
156 |
|
|
|
157 |
element_audio_src = "" |
|
|
158 |
element_audio_href = "" |
|
|
159 |
res = element_node.xpath("audio") |
|
|
160 |
if len(res) > 0: |
|
|
161 |
element_audio_src = res[0].get(u"source",u"") |
|
|
162 |
element_audio_href = res[0].text |
|
|
163 |
|
|
|
164 |
element_tags = [] |
|
|
165 |
|
|
|
166 |
tags = element_node.get(u"tags",u"") |
|
|
167 |
|
|
|
168 |
tags_list = map(lambda s:s.strip(),tags.split(",")) |
|
|
169 |
|
|
|
170 |
#tags |
|
|
171 |
if tags is None or len(tags) == 0: |
|
|
172 |
tags_list = [] |
|
|
173 |
restagnode = element_node.xpath("tag/text()", smart_strings=False) |
|
|
174 |
for tagnode in restagnode: |
|
|
175 |
tags_list.append(tagnode) |
|
|
176 |
|
|
|
177 |
if tags_list is None or len(tags_list) == 0: |
|
|
178 |
tags_list = [] |
|
|
179 |
restagnode = element_node.xpath("tags/tag/text()", smart_strings=False) |
|
|
180 |
for tagnode in restagnode: |
|
|
181 |
tags_list.append(tagnode) |
|
|
182 |
|
|
|
183 |
tag_date = datetime.utcnow().isoformat() |
|
|
184 |
for tag_title in tags_list: |
|
|
185 |
if tag_title not in self.tags: |
|
|
186 |
tag_id = unicode(uuid.uuid1()) |
|
|
187 |
new_tag = { |
|
|
188 |
"id":tag_id, |
|
|
189 |
"meta" : { |
|
|
190 |
"dc:creator":"IRI", |
|
|
191 |
"dc:created": tag_date, |
|
|
192 |
"dc:contributor":"IRI", |
|
|
193 |
"dc:modified": tag_date, |
|
|
194 |
"dc:title":tag_title |
|
|
195 |
} |
|
|
196 |
} |
|
|
197 |
self.tags[tag_title] = new_tag |
|
|
198 |
self.tags_dict[tag_id] = new_tag |
|
|
199 |
element_tags.append({"id-ref":tag_id}) |
|
|
200 |
|
|
|
201 |
if not element_tags: |
|
|
202 |
element_tags = None |
|
|
203 |
|
|
|
204 |
new_annotation = { |
|
|
205 |
"begin": element_begin, |
|
|
206 |
"end": int(element_begin) + int(element_duration), |
|
|
207 |
"id": element_id, |
|
|
208 |
"media": element_media, |
|
|
209 |
"content": { |
|
|
210 |
"mimetype": "application/x-ldt-structured", |
|
|
211 |
"title": element_title, |
|
|
212 |
"description": element_description, |
|
|
213 |
"color": element_color, |
|
|
214 |
"audio": { |
|
|
215 |
"src" : element_audio_src, |
|
|
216 |
"mimetype": "audio/mp3", |
|
|
217 |
"href": element_audio_href |
|
|
218 |
}, |
|
|
219 |
}, |
|
|
220 |
"tags": element_tags, |
|
|
221 |
"meta": { |
|
|
222 |
"id-ref": decoupage_id, |
|
|
223 |
"dc:creator": decoupage_creator, |
|
|
224 |
"dc:contributor": decoupage_contributor, |
|
|
225 |
"dc:created": decoupage_created, |
|
|
226 |
"dc:modified": decoupage_modified |
|
|
227 |
} |
|
|
228 |
} |
|
|
229 |
|
|
|
230 |
self.annotations_dict[element_id] = new_annotation |
|
|
231 |
self.annotations_by_annotation_types[decoupage_id].append(new_annotation) |
|
|
232 |
|
|
|
233 |
if not list_items: |
|
|
234 |
new_list["items"] = None |
|
|
235 |
self.lists_dict[ensemble_id] = new_list |
|
|
236 |
|
|
|
237 |
|
|
|
238 |
def __parse_ldt(self): |
|
|
239 |
|
|
|
240 |
self.ldt_doc = lxml.etree.fromstring(self.project.ldt.encode("utf-8")) |
|
|
241 |
|
|
|
242 |
if self.from_display: |
|
|
243 |
xpath_str = "/iri/displays/display[position()=1]" |
|
|
244 |
if isinstance(self.from_display, basestring): |
|
|
245 |
xpath_str = "/iri/displays/display[@id='%s']" % self.from_display |
|
|
246 |
|
|
|
247 |
self.__parse_views(self.ldt_doc.xpath(xpath_str)) |
|
|
248 |
|
|
|
249 |
res = self.ldt_doc.xpath("/iri/medias/media") |
|
|
250 |
for mediaNode in res: |
|
|
251 |
iri_id = mediaNode.attrib[u"id"] |
|
|
252 |
if self.from_display and iri_id not in self.display_contents_list: |
|
|
253 |
continue |
|
|
254 |
content = Content.objects.get(iri_id=iri_id) |
|
|
255 |
self.__parse_content(content) |
|
|
256 |
|
|
|
257 |
res = self.ldt_doc.xpath("/iri/annotations/content") |
|
|
258 |
for content_node in res: |
|
|
259 |
content_id = content_node.attrib[u"id"] |
|
|
260 |
if self.from_display and content_id not in self.display_contents_list: |
|
|
261 |
continue |
|
|
262 |
content = Content.objects.get(iri_id=content_id) |
|
|
263 |
for ensemble_node in content_node: |
|
|
264 |
if ensemble_node.tag != "ensemble" : |
|
|
265 |
continue |
|
|
266 |
ensemble_id = ensemble_node.get("id") |
|
|
267 |
if self.from_display and ensemble_id not in self.display_ensemble_list: |
|
|
268 |
continue |
|
|
269 |
self.__parse_ensemble(ensemble_node, content) |
|
|
270 |
|
|
|
271 |
#reorder annotations and annotation type from view |
|
|
272 |
if self.from_display and len(self.views_dict) > 0: |
|
|
273 |
new_annotation_types_dict = SortedDict() |
|
|
274 |
new_annotations_dict = SortedDict() |
|
|
275 |
for annotation_type in self.display_cuttings_list: |
|
|
276 |
if annotation_type in self.annotation_types_dict: |
|
|
277 |
new_annotation_types_dict[annotation_type] = self.annotation_types_dict[annotation_type] |
|
|
278 |
for annot in self.annotations_by_annotation_types[annotation_type]: |
|
|
279 |
new_annotations_dict[annot['id']] = annot |
|
|
280 |
|
|
|
281 |
self.annotations_dict = new_annotations_dict |
|
|
282 |
self.annotation_types_dict = new_annotation_types_dict |
|
|
283 |
|
|
|
284 |
self.parsed = True |
|
|
285 |
|
|
|
286 |
def __parse_content(self, content): |
|
|
287 |
|
|
|
288 |
doc = lxml.etree.parse(content.iri_file_path()) |
|
|
289 |
|
|
|
290 |
authors = content.authors.all() |
|
|
291 |
|
|
|
292 |
if len(authors) > 0 : |
|
|
293 |
author = authors[0].handle |
|
|
294 |
else : |
|
|
295 |
author = "IRI" |
|
|
296 |
|
|
|
297 |
if len(authors) > 1 : |
|
|
298 |
contributor = authors[1].handle |
|
|
299 |
else : |
|
|
300 |
contributor = author |
|
|
301 |
|
|
|
302 |
content_author = "" |
|
|
303 |
|
|
|
304 |
res = doc.xpath("/iri/head/meta[@name='author']/@content") |
|
|
305 |
if len(res) > 0: |
|
|
306 |
content_author = res[0] |
|
|
307 |
|
|
|
308 |
|
|
|
309 |
content_date = "" |
|
|
310 |
|
|
|
311 |
res = doc.xpath("/iri/head/meta[@name='date']/@content") |
|
|
312 |
if len(res) > 0: |
|
|
313 |
content_date = res[0] |
|
|
314 |
|
|
|
315 |
href = "" |
|
|
316 |
meta_item_value = "" |
|
|
317 |
if content.videopath: |
|
|
318 |
href = content.videopath.rstrip('/') + "/" + content.src |
|
|
319 |
meta_item_value = content.videopath.rstrip('/') + "/" |
|
|
320 |
|
|
|
321 |
new_media = { |
|
|
322 |
"http://advene.liris.cnrs.fr/ns/frame_of_reference/ms" : "o=0", |
|
|
323 |
"id" : content.iri_id, |
|
|
324 |
"href" : href, |
|
|
325 |
"unit" : "ms", |
|
|
326 |
"origin" : "0", |
|
|
327 |
"meta": { |
|
|
328 |
"dc:creator" : author, |
|
|
329 |
"dc:created" : content.creation_date.isoformat(), |
|
|
330 |
"dc:contributor" : contributor, |
|
|
331 |
"dc:modified" : content.update_date.isoformat(), |
|
|
332 |
"dc:creator.contents" : content_author, |
|
|
333 |
"dc:created.contents" : content_date, |
|
|
334 |
"dc:title" : content.title, |
|
|
335 |
"dc:description" : content.description, |
|
|
336 |
"dc:duration" : content.get_duration(), |
|
|
337 |
"item": { |
|
|
338 |
"name" : "streamer", |
|
|
339 |
"value": meta_item_value, |
|
|
340 |
}, |
|
|
341 |
} |
|
|
342 |
} |
|
|
343 |
|
|
|
344 |
self.medias_dict[content.iri_id] = new_media |
|
|
345 |
|
|
|
346 |
if self.serialize_contents: |
|
|
347 |
res = doc.xpath("/iri/body/ensembles/ensemble") |
|
|
348 |
for ensemble_node in res: |
|
|
349 |
self.__parse_ensemble(ensemble_node, content) |
|
|
350 |
|
|
|
351 |
|
|
|
352 |
def serialize_to_cinelab(self): |
|
|
353 |
|
|
|
354 |
res = {} |
|
|
355 |
|
|
|
356 |
if not self.parsed: |
|
|
357 |
self.__parse_ldt() |
|
|
358 |
|
|
|
359 |
|
|
|
360 |
project_main_media = "" |
|
|
361 |
if len(self.medias_dict) > 0: |
|
|
362 |
project_main_media = self.medias_dict.value_for_index(0)["id"] |
|
|
363 |
|
|
|
364 |
res['meta'] = { |
|
|
365 |
'id': self.project.ldt_id, |
|
|
366 |
'dc:created':self.project.creation_date.isoformat(), |
|
|
367 |
'dc:modified':self.project.modification_date.isoformat(), |
|
|
368 |
'dc:contributor':self.project.changed_by, |
|
|
369 |
'dc:creator':self.project.created_by, |
|
|
370 |
'dc:title':self.project.title, |
|
|
371 |
'dc:description':self.project.get_description(self.ldt_doc), # get from doc, parse ldt |
|
|
372 |
'main_media': {"id-ref":project_main_media} |
|
|
373 |
} |
|
|
374 |
|
|
|
375 |
|
|
|
376 |
res['medias'] = self.medias_dict.values() if len(self.medias_dict) > 0 else None |
|
|
377 |
res['lists'] = self.lists_dict.values() if len(self.lists_dict) > 0 else None |
|
|
378 |
res['tags'] = self.tags.values() if len(self.tags) > 0 else None |
|
|
379 |
res['views'] = self.views_dict.values() if len(self.views_dict) > 0 else None |
|
|
380 |
|
|
|
381 |
res['annotation-types'] = self.annotation_types_dict.values() if len(self.annotation_types_dict) > 0 else None |
|
|
382 |
res['annotations'] = self.annotations_dict.values() if len(self.annotations_dict) > 0 else None |
|
|
383 |
|
|
|
384 |
|
|
|
385 |
|
|
|
386 |
return res |
|
|
387 |
|
|
|
388 |
def getAnnotations(self, first_cutting=True): |
|
|
389 |
|
|
|
390 |
if not self.parsed: |
|
|
391 |
self.__parse_ldt() |
|
|
392 |
|
|
|
393 |
annotations = [] |
|
|
394 |
|
|
|
395 |
current_cutting = None |
|
|
396 |
uri = None |
|
|
397 |
for annot in self.annotations_dict.values(): |
|
|
398 |
logging.debug("current cutting" + repr(current_cutting) + " : annot " + annot['meta']['id-ref']) |
|
|
399 |
if first_cutting and current_cutting and current_cutting != annot['meta']['id-ref'] : |
|
|
400 |
break |
|
|
401 |
current_cutting = annot['meta']['id-ref'] |
|
|
402 |
content_id = annot['media'] |
|
|
403 |
content = Content.objects.get(iri_id=content_id) |
|
|
404 |
if annot['tags']: |
|
|
405 |
tags_list = map(lambda tag_entry: self.tags_dict[tag_entry['id-ref']]['meta']['dc:title'],annot['tags']) |
|
|
406 |
else: |
|
|
407 |
tags_list = [] |
|
|
408 |
begin = int(annot['begin']) |
|
|
409 |
duration = int(annot['end'])-begin |
|
|
410 |
if content.media_obj and content.media_obj.external_publication_url: |
|
|
411 |
uri = "%s#t=%d" % (content.media_obj.external_publication_url, begin) |
|
|
412 |
|
|
|
413 |
|
|
|
414 |
annotations.append({ |
|
|
415 |
'begin': begin, |
|
|
416 |
'duration':duration, |
|
|
417 |
'title':annot['content']['title'], |
|
|
418 |
'desc':annot['content']['description'], |
|
|
419 |
'tags': tags_list, |
|
|
420 |
'id':annot['id'], |
|
|
421 |
'uri':uri |
|
|
422 |
}) |
|
|
423 |
|
|
|
424 |
return annotations |
|
|
425 |
|
|
|
426 |
|