|
1 from datetime import datetime |
|
2 from django.utils.datastructures import SortedDict |
|
3 from ldt.ldt_utils.models import Content, Project |
|
4 import logging |
|
5 import lxml.etree |
|
6 import uuid |
|
7 |
|
8 DATE_FORMATS = ["%d/%m/%Y","%Y-%m-%d"] |
|
9 |
|
10 """ |
|
11 Serialize a project object to a cinelab compatible array |
|
12 """ |
|
13 class ProjectSerializer: |
|
14 |
|
15 def __init__(self, project, from_contents=True, from_display=True): |
|
16 self.project = project |
|
17 self.parsed = False |
|
18 self.ldt_doc = None |
|
19 self.medias_dict = SortedDict() |
|
20 self.annotations_dict = SortedDict() |
|
21 self.annotations_by_annotation_types = {} |
|
22 self.tags = {} |
|
23 self.tags_dict = SortedDict() |
|
24 self.annotation_types_dict = SortedDict() |
|
25 self.views_dict = SortedDict() |
|
26 self.lists_dict = SortedDict() |
|
27 self.serialize_contents = from_contents |
|
28 self.from_display = from_display |
|
29 self.display_contents_list = [] |
|
30 self.display_cuttings_list = [] |
|
31 self.display_ensemble_list = [] |
|
32 |
|
33 |
|
34 def __parse_views(self, display_node_list): |
|
35 for display_node in display_node_list: |
|
36 display_id = display_node.get(u"id", None) |
|
37 if not display_id: |
|
38 continue |
|
39 content_list = [] |
|
40 cuttings_list = [] |
|
41 new_display = { |
|
42 "id": display_id, |
|
43 "contents": content_list, |
|
44 "annotation_types": cuttings_list, |
|
45 } |
|
46 |
|
47 for content_node in display_node.xpath("content"): |
|
48 content_id = content_node.get("id") |
|
49 if content_id not in content_list: |
|
50 content_list.append(content_id) |
|
51 if content_id not in self.display_contents_list: |
|
52 self.display_contents_list.append(content_id) |
|
53 for cutting_node in content_node.xpath("decoupage"): |
|
54 cutting_id = cutting_node.get("id") |
|
55 if cutting_id not in cuttings_list: |
|
56 cuttings_list.append(cutting_id) |
|
57 if cutting_id not in self.display_cuttings_list: |
|
58 self.display_cuttings_list.append(cutting_id) |
|
59 ensemble_id = cutting_node.get("idens") |
|
60 if ensemble_id not in self.display_ensemble_list: |
|
61 self.display_ensemble_list.append(ensemble_id) |
|
62 self.views_dict[display_id] = new_display |
|
63 |
|
64 |
|
65 |
|
66 def __parse_ensemble(self, ensemble_node, content): |
|
67 |
|
68 ensemble_id = ensemble_node.attrib[u"id"] |
|
69 ensemble_author = ensemble_node.attrib[u"author"] |
|
70 ensemble_title = ensemble_node.attrib[u"title"] |
|
71 ensemble_description = ensemble_node.attrib[u"abstract"] |
|
72 ensemble_created = datetime.utcnow().isoformat() |
|
73 ensemble_modified = ensemble_created |
|
74 |
|
75 list_items = [] |
|
76 new_list = { |
|
77 "id" : ensemble_id, |
|
78 "items" : list_items, |
|
79 "meta" : { |
|
80 "dc:creator":ensemble_author, |
|
81 "dc:created": ensemble_created, |
|
82 "dc:contributor":"undefined", |
|
83 "dc:modified": ensemble_modified, |
|
84 "dc:title":ensemble_title, |
|
85 "dc:description": ensemble_description, |
|
86 "id-ref":content.iri_id, |
|
87 "editable":"false" |
|
88 } |
|
89 } |
|
90 |
|
91 |
|
92 for decoupage_node in ensemble_node: |
|
93 if decoupage_node.tag != "decoupage" : |
|
94 continue |
|
95 |
|
96 decoupage_id = decoupage_node.attrib[ u"id"] |
|
97 if self.from_display and decoupage_id not in self.display_cuttings_list: |
|
98 continue |
|
99 decoupage_creator = decoupage_node.attrib[u"author"] |
|
100 if not decoupage_creator: |
|
101 decoupage_creator = "IRI" |
|
102 decoupage_contributor = decoupage_creator |
|
103 date_str = decoupage_node.get(u"date") |
|
104 decoupage_created = None |
|
105 if date_str : |
|
106 for date_format in DATE_FORMATS: |
|
107 try: |
|
108 decoupage_created = datetime.strptime(date_str,date_format).isoformat() |
|
109 break |
|
110 except Exception: |
|
111 decoupage_created = None |
|
112 if decoupage_created is None: |
|
113 decoupage_created = datetime.utcnow().isoformat() |
|
114 decoupage_modified = decoupage_created |
|
115 |
|
116 decoupage_title = "" |
|
117 for txtRes in decoupage_node.xpath("title/text()", smart_strings=False): |
|
118 decoupage_title += txtRes |
|
119 |
|
120 decoupage_description = "" |
|
121 for txtRes in decoupage_node.xpath("abstract/text()", smart_strings=False): |
|
122 decoupage_description += txtRes |
|
123 |
|
124 |
|
125 list_items.append({"id-ref":decoupage_id}) |
|
126 |
|
127 new_annotation_types = { |
|
128 "id":decoupage_id, |
|
129 "dc:creator":decoupage_creator, |
|
130 "dc:created":decoupage_created, |
|
131 "dc:contributor":decoupage_contributor, |
|
132 "dc:modified":decoupage_modified, |
|
133 "dc:title":decoupage_title, |
|
134 "dc:description":decoupage_description |
|
135 } |
|
136 |
|
137 self.annotation_types_dict[decoupage_id] = new_annotation_types |
|
138 self.annotations_by_annotation_types[decoupage_id] = [] |
|
139 |
|
140 res = decoupage_node.xpath("elements/element") |
|
141 for element_node in res: |
|
142 |
|
143 element_id = element_node.attrib[u"id"] |
|
144 element_begin = element_node.attrib[u"begin"] |
|
145 element_duration = element_node.attrib[u"dur"] |
|
146 element_media = content.iri_id |
|
147 element_color = element_node.attrib[u"color"] |
|
148 |
|
149 element_title = "" |
|
150 for txtRes in element_node.xpath("title/text()", smart_strings=False): |
|
151 element_title += txtRes |
|
152 |
|
153 element_description = "" |
|
154 for txtRes in element_node.xpath("abstract/text()", smart_strings=False): |
|
155 element_description += txtRes |
|
156 |
|
157 element_audio_src = "" |
|
158 element_audio_href = "" |
|
159 res = element_node.xpath("audio") |
|
160 if len(res) > 0: |
|
161 element_audio_src = res[0].get(u"source",u"") |
|
162 element_audio_href = res[0].text |
|
163 |
|
164 element_tags = [] |
|
165 |
|
166 tags = element_node.get(u"tags",u"") |
|
167 |
|
168 tags_list = map(lambda s:s.strip(),tags.split(",")) |
|
169 |
|
170 #tags |
|
171 if tags is None or len(tags) == 0: |
|
172 tags_list = [] |
|
173 restagnode = element_node.xpath("tag/text()", smart_strings=False) |
|
174 for tagnode in restagnode: |
|
175 tags_list.append(tagnode) |
|
176 |
|
177 if tags_list is None or len(tags_list) == 0: |
|
178 tags_list = [] |
|
179 restagnode = element_node.xpath("tags/tag/text()", smart_strings=False) |
|
180 for tagnode in restagnode: |
|
181 tags_list.append(tagnode) |
|
182 |
|
183 tag_date = datetime.utcnow().isoformat() |
|
184 for tag_title in tags_list: |
|
185 if tag_title not in self.tags: |
|
186 tag_id = unicode(uuid.uuid1()) |
|
187 new_tag = { |
|
188 "id":tag_id, |
|
189 "meta" : { |
|
190 "dc:creator":"IRI", |
|
191 "dc:created": tag_date, |
|
192 "dc:contributor":"IRI", |
|
193 "dc:modified": tag_date, |
|
194 "dc:title":tag_title |
|
195 } |
|
196 } |
|
197 self.tags[tag_title] = new_tag |
|
198 self.tags_dict[tag_id] = new_tag |
|
199 element_tags.append({"id-ref":tag_id}) |
|
200 |
|
201 if not element_tags: |
|
202 element_tags = None |
|
203 |
|
204 new_annotation = { |
|
205 "begin": element_begin, |
|
206 "end": int(element_begin) + int(element_duration), |
|
207 "id": element_id, |
|
208 "media": element_media, |
|
209 "content": { |
|
210 "mimetype": "application/x-ldt-structured", |
|
211 "title": element_title, |
|
212 "description": element_description, |
|
213 "color": element_color, |
|
214 "audio": { |
|
215 "src" : element_audio_src, |
|
216 "mimetype": "audio/mp3", |
|
217 "href": element_audio_href |
|
218 }, |
|
219 }, |
|
220 "tags": element_tags, |
|
221 "meta": { |
|
222 "id-ref": decoupage_id, |
|
223 "dc:creator": decoupage_creator, |
|
224 "dc:contributor": decoupage_contributor, |
|
225 "dc:created": decoupage_created, |
|
226 "dc:modified": decoupage_modified |
|
227 } |
|
228 } |
|
229 |
|
230 self.annotations_dict[element_id] = new_annotation |
|
231 self.annotations_by_annotation_types[decoupage_id].append(new_annotation) |
|
232 |
|
233 if not list_items: |
|
234 new_list["items"] = None |
|
235 self.lists_dict[ensemble_id] = new_list |
|
236 |
|
237 |
|
238 def __parse_ldt(self): |
|
239 |
|
240 self.ldt_doc = lxml.etree.fromstring(self.project.ldt.encode("utf-8")) |
|
241 |
|
242 if self.from_display: |
|
243 xpath_str = "/iri/displays/display[position()=1]" |
|
244 if isinstance(self.from_display, basestring): |
|
245 xpath_str = "/iri/displays/display[@id='%s']" % self.from_display |
|
246 |
|
247 self.__parse_views(self.ldt_doc.xpath(xpath_str)) |
|
248 |
|
249 res = self.ldt_doc.xpath("/iri/medias/media") |
|
250 for mediaNode in res: |
|
251 iri_id = mediaNode.attrib[u"id"] |
|
252 if self.from_display and iri_id not in self.display_contents_list: |
|
253 continue |
|
254 content = Content.objects.get(iri_id=iri_id) |
|
255 self.__parse_content(content) |
|
256 |
|
257 res = self.ldt_doc.xpath("/iri/annotations/content") |
|
258 for content_node in res: |
|
259 content_id = content_node.attrib[u"id"] |
|
260 if self.from_display and content_id not in self.display_contents_list: |
|
261 continue |
|
262 content = Content.objects.get(iri_id=content_id) |
|
263 for ensemble_node in content_node: |
|
264 if ensemble_node.tag != "ensemble" : |
|
265 continue |
|
266 ensemble_id = ensemble_node.get("id") |
|
267 if self.from_display and ensemble_id not in self.display_ensemble_list: |
|
268 continue |
|
269 self.__parse_ensemble(ensemble_node, content) |
|
270 |
|
271 #reorder annotations and annotation type from view |
|
272 if self.from_display and len(self.views_dict) > 0: |
|
273 new_annotation_types_dict = SortedDict() |
|
274 new_annotations_dict = SortedDict() |
|
275 for annotation_type in self.display_cuttings_list: |
|
276 if annotation_type in self.annotation_types_dict: |
|
277 new_annotation_types_dict[annotation_type] = self.annotation_types_dict[annotation_type] |
|
278 for annot in self.annotations_by_annotation_types[annotation_type]: |
|
279 new_annotations_dict[annot['id']] = annot |
|
280 |
|
281 self.annotations_dict = new_annotations_dict |
|
282 self.annotation_types_dict = new_annotation_types_dict |
|
283 |
|
284 self.parsed = True |
|
285 |
|
286 def __parse_content(self, content): |
|
287 |
|
288 doc = lxml.etree.parse(content.iri_file_path()) |
|
289 |
|
290 authors = content.authors.all() |
|
291 |
|
292 if len(authors) > 0 : |
|
293 author = authors[0].handle |
|
294 else : |
|
295 author = "IRI" |
|
296 |
|
297 if len(authors) > 1 : |
|
298 contributor = authors[1].handle |
|
299 else : |
|
300 contributor = author |
|
301 |
|
302 content_author = "" |
|
303 |
|
304 res = doc.xpath("/iri/head/meta[@name='author']/@content") |
|
305 if len(res) > 0: |
|
306 content_author = res[0] |
|
307 |
|
308 |
|
309 content_date = "" |
|
310 |
|
311 res = doc.xpath("/iri/head/meta[@name='date']/@content") |
|
312 if len(res) > 0: |
|
313 content_date = res[0] |
|
314 |
|
315 href = "" |
|
316 meta_item_value = "" |
|
317 if content.videopath: |
|
318 href = content.videopath.rstrip('/') + "/" + content.src |
|
319 meta_item_value = content.videopath.rstrip('/') + "/" |
|
320 |
|
321 new_media = { |
|
322 "http://advene.liris.cnrs.fr/ns/frame_of_reference/ms" : "o=0", |
|
323 "id" : content.iri_id, |
|
324 "href" : href, |
|
325 "unit" : "ms", |
|
326 "origin" : "0", |
|
327 "meta": { |
|
328 "dc:creator" : author, |
|
329 "dc:created" : content.creation_date.isoformat(), |
|
330 "dc:contributor" : contributor, |
|
331 "dc:modified" : content.update_date.isoformat(), |
|
332 "dc:creator.contents" : content_author, |
|
333 "dc:created.contents" : content_date, |
|
334 "dc:title" : content.title, |
|
335 "dc:description" : content.description, |
|
336 "dc:duration" : content.get_duration(), |
|
337 "item": { |
|
338 "name" : "streamer", |
|
339 "value": meta_item_value, |
|
340 }, |
|
341 } |
|
342 } |
|
343 |
|
344 self.medias_dict[content.iri_id] = new_media |
|
345 |
|
346 if self.serialize_contents: |
|
347 res = doc.xpath("/iri/body/ensembles/ensemble") |
|
348 for ensemble_node in res: |
|
349 self.__parse_ensemble(ensemble_node, content) |
|
350 |
|
351 |
|
352 def serialize_to_cinelab(self): |
|
353 |
|
354 res = {} |
|
355 |
|
356 if not self.parsed: |
|
357 self.__parse_ldt() |
|
358 |
|
359 |
|
360 project_main_media = "" |
|
361 if len(self.medias_dict) > 0: |
|
362 project_main_media = self.medias_dict.value_for_index(0)["id"] |
|
363 |
|
364 res['meta'] = { |
|
365 'id': self.project.ldt_id, |
|
366 'dc:created':self.project.creation_date.isoformat(), |
|
367 'dc:modified':self.project.modification_date.isoformat(), |
|
368 'dc:contributor':self.project.changed_by, |
|
369 'dc:creator':self.project.created_by, |
|
370 'dc:title':self.project.title, |
|
371 'dc:description':self.project.get_description(self.ldt_doc), # get from doc, parse ldt |
|
372 'main_media': {"id-ref":project_main_media} |
|
373 } |
|
374 |
|
375 |
|
376 res['medias'] = self.medias_dict.values() if len(self.medias_dict) > 0 else None |
|
377 res['lists'] = self.lists_dict.values() if len(self.lists_dict) > 0 else None |
|
378 res['tags'] = self.tags.values() if len(self.tags) > 0 else None |
|
379 res['views'] = self.views_dict.values() if len(self.views_dict) > 0 else None |
|
380 |
|
381 res['annotation-types'] = self.annotation_types_dict.values() if len(self.annotation_types_dict) > 0 else None |
|
382 res['annotations'] = self.annotations_dict.values() if len(self.annotations_dict) > 0 else None |
|
383 |
|
384 |
|
385 |
|
386 return res |
|
387 |
|
388 def getAnnotations(self, first_cutting=True): |
|
389 |
|
390 if not self.parsed: |
|
391 self.__parse_ldt() |
|
392 |
|
393 annotations = [] |
|
394 |
|
395 current_cutting = None |
|
396 uri = None |
|
397 for annot in self.annotations_dict.values(): |
|
398 logging.debug("current cutting" + repr(current_cutting) + " : annot " + annot['meta']['id-ref']) |
|
399 if first_cutting and current_cutting and current_cutting != annot['meta']['id-ref'] : |
|
400 break |
|
401 current_cutting = annot['meta']['id-ref'] |
|
402 content_id = annot['media'] |
|
403 content = Content.objects.get(iri_id=content_id) |
|
404 if annot['tags']: |
|
405 tags_list = map(lambda tag_entry: self.tags_dict[tag_entry['id-ref']]['meta']['dc:title'],annot['tags']) |
|
406 else: |
|
407 tags_list = [] |
|
408 begin = int(annot['begin']) |
|
409 duration = int(annot['end'])-begin |
|
410 if content.media_obj and content.media_obj.external_publication_url: |
|
411 uri = "%s#t=%d" % (content.media_obj.external_publication_url, begin) |
|
412 |
|
413 |
|
414 annotations.append({ |
|
415 'begin': begin, |
|
416 'duration':duration, |
|
417 'title':annot['content']['title'], |
|
418 'desc':annot['content']['description'], |
|
419 'tags': tags_list, |
|
420 'id':annot['id'], |
|
421 'uri':uri |
|
422 }) |
|
423 |
|
424 return annotations |
|
425 |
|
426 |