from django.conf import settings
from datetime import datetime
from django.utils.datastructures import SortedDict
from ldt.ldt_utils.models import Content
from ldt.ldt_utils.utils import reduce_text_node
from ldt.ldt_utils.models import User, Project
import logging
import lxml.etree
import uuid
DATE_FORMATS = ["%d/%m/%Y", "%Y-%m-%d"]
"""
Serialize a project object to a cinelab compatible array
"""
class ProjectSerializer:
def __init__(self, project, from_contents=True, from_display=True, first_cutting=None, viewable_contents=[]):
self.project = project
self.parsed = False
self.ldt_doc = None
self.medias_dict = SortedDict()
self.annotations_dict = SortedDict()
self.annotations_by_annotation_types = {}
self.tags = {}
self.tags_dict = SortedDict()
self.annotation_types_dict = SortedDict()
self.views_dict = SortedDict()
self.lists_dict = SortedDict()
self.serialize_contents = from_contents
self.from_display = from_display
self.display_contents_list = []
self.display_cuttings_list = []
self.display_ensemble_list = []
self.viewable_contents = viewable_contents
self.first_cutting = first_cutting
def __parse_views(self, display_node_list):
for display_node in display_node_list:
display_id = display_node.get(u"id", None)
if not display_id:
continue
content_list = []
cuttings_list = []
new_display = {
"id": display_id,
"contents": content_list,
"annotation_types": cuttings_list,
}
for content_node in display_node.xpath("content"):
content_id = content_node.get("id")
if content_id not in content_list:
content_list.append(content_id)
if content_id not in self.display_contents_list:
self.display_contents_list.append(content_id)
for cutting_node in content_node.xpath("decoupage"):
cutting_id = cutting_node.get("id")
if cutting_id not in cuttings_list:
cuttings_list.append(cutting_id)
if cutting_id not in self.display_cuttings_list:
self.display_cuttings_list.append(cutting_id)
ensemble_id = cutting_node.get("idens")
if ensemble_id not in self.display_ensemble_list:
self.display_ensemble_list.append(ensemble_id)
# sets cutting to display in first position for the metadataplayer
if self.first_cutting:
annotation_types = new_display['annotation_types']
if len(annotation_types) > 1:
index = -1
for i, s in enumerate(annotation_types):
if s == self.first_cutting:
index = i
break
if index >= 0:
annotation_types[0], annotation_types[index] = annotation_types[index], annotation_types[0]
self.views_dict[display_id] = new_display
def __parse_ensemble(self, ensemble_node, content):
ensemble_id = ensemble_node.attrib[u"id"]
ensemble_author = ensemble_node.attrib[u"author"]
ensemble_title = ensemble_node.attrib[u"title"]
ensemble_description = ensemble_node.attrib[u"abstract"]
ensemble_created = datetime.utcnow().isoformat()
ensemble_modified = ensemble_created
list_items = []
new_list = {
"id" : ensemble_id,
"items" : list_items,
"meta" : {
"dc:creator":ensemble_author,
"dc:created": ensemble_created,
"dc:contributor":"undefined",
"dc:modified": ensemble_modified,
"dc:title":ensemble_title,
"dc:description": ensemble_description,
"id-ref":content.iri_id,
"editable":"false"
}
}
for decoupage_node in ensemble_node:
if decoupage_node.tag != "decoupage" :
continue
decoupage_id = decoupage_node.attrib[ u"id"]
if self.from_display and decoupage_id not in self.display_cuttings_list:
continue
decoupage_creator = decoupage_node.attrib[u"author"]
if not decoupage_creator:
decoupage_creator = "IRI"
decoupage_contributor = decoupage_creator
date_str = decoupage_node.get(u"date")
decoupage_created = None
if date_str :
for date_format in DATE_FORMATS:
try:
decoupage_created = datetime.strptime(date_str, date_format).isoformat()
break
except Exception:
decoupage_created = None
if decoupage_created is None:
decoupage_created = datetime.utcnow().isoformat()
decoupage_modified = decoupage_created
decoupage_title = ""
for txtRes in decoupage_node.xpath("title/text()", smart_strings=False):
decoupage_title += txtRes
decoupage_description = ""
for txtRes in decoupage_node.xpath("abstract/text()", smart_strings=False):
decoupage_description += txtRes
list_items.append({"id-ref":decoupage_id})
new_annotation_types = {
"id":decoupage_id,
"dc:creator":decoupage_creator,
"dc:created":decoupage_created,
"dc:contributor":decoupage_contributor,
"dc:modified":decoupage_modified,
"dc:title":decoupage_title,
"dc:description":decoupage_description
}
self.annotation_types_dict[decoupage_id] = new_annotation_types
self.annotations_by_annotation_types[decoupage_id] = []
res = decoupage_node.xpath("elements/element")
for element_node in res:
element_id = element_node.attrib[u"id"]
element_begin = element_node.attrib[u"begin"]
element_duration = element_node.attrib[u"dur"]
element_media = content.iri_id
element_color = element_node.attrib.get(u"color", "")
element_ldt_src = element_node.attrib.get(u"src", "")
element_title = reduce_text_node(element_node, "title/text()")
element_description = reduce_text_node(element_node, "abstract/text()")
element_source_node_list = element_node.xpath("meta/source")
if len(element_source_node_list) > 0:
element_source_node = element_source_node_list[0]
element_source = {"mimetype" :element_source_node.get(u'mimetype'), "url":element_source_node.get(u'url'), "content":reduce_text_node(element_source_node)}
else:
element_source = None
element_audio_src = ""
element_audio_href = ""
res = element_node.xpath("audio")
if len(res) > 0:
element_audio_src = res[0].get(u"source", u"")
element_audio_href = res[0].text
element_tags = []
tags = element_node.get(u"tags", u"")
tags_list = map(lambda s:s.strip(), tags.split(","))
#tags
if tags is None or len(tags) == 0:
tags_list = []
restagnode = element_node.xpath("tag/text()", smart_strings=False)
for tagnode in restagnode:
tags_list.append(tagnode)
if tags_list is None or len(tags_list) == 0:
tags_list = []
restagnode = element_node.xpath("tags/tag/text()", smart_strings=False)
for tagnode in restagnode:
tags_list.append(tagnode)
tag_date = datetime.utcnow().isoformat()
for tag_title in tags_list:
if tag_title not in self.tags:
tag_id = unicode(uuid.uuid1())
new_tag = {
"id":tag_id,
"meta" : {
"dc:creator":"IRI",
"dc:created": tag_date,
"dc:contributor":"IRI",
"dc:modified": tag_date,
"dc:title":tag_title
}
}
self.tags[tag_title] = new_tag
self.tags_dict[tag_id] = new_tag
element_tags.append({"id-ref":tag_id})
if not element_tags:
element_tags = None
new_annotation = {
"begin": int(element_begin),
"end": int(element_begin) + int(element_duration),
"id": element_id,
"media": element_media,
"color": element_color,
"content": {
"mimetype": "application/x-ldt-structured",
"title": element_title,
"description": element_description,
"color": element_color,
"img": {
"src": element_ldt_src,
},
"audio": {
"src" : element_audio_src,
"mimetype": "audio/mp3",
"href": element_audio_href
},
"polemics" :[pol_elem.text for pol_elem in element_node.xpath("meta/polemics/polemic")],
},
"tags": element_tags,
"meta": {
"id-ref": decoupage_id,
"dc:creator": decoupage_creator,
"dc:contributor": decoupage_contributor,
"dc:created": decoupage_created,
"dc:modified": decoupage_modified,
}
}
if element_source:
new_annotation['meta']['dc:source'] = element_source
self.annotations_dict[element_id] = new_annotation
self.annotations_by_annotation_types[decoupage_id].append(new_annotation)
if not list_items:
new_list["items"] = None
self.lists_dict[ensemble_id] = new_list
def __parse_ldt(self):
self.ldt_doc = lxml.etree.fromstring(self.project.ldt.encode("utf-8"))
if self.from_display:
xpath_str = "/iri/displays/display[position()=1]"
if isinstance(self.from_display, basestring):
xpath_str = "/iri/displays/display[@id='%s']" % self.from_display
self.__parse_views(self.ldt_doc.xpath(xpath_str))
res = self.ldt_doc.xpath("/iri/medias/media")
for mediaNode in res:
iri_id = mediaNode.attrib[u"id"]
if self.from_display and iri_id not in self.display_contents_list:
continue
content = Content.objects.get(iri_id=iri_id) #@UndefinedVariable
self.__parse_content(content)
res = self.ldt_doc.xpath("/iri/annotations/content")
for content_node in res:
content_id = content_node.attrib[u"id"]
if self.from_display and content_id not in self.display_contents_list:
continue
content = Content.objects.get(iri_id=content_id) #@UndefinedVariable
for ensemble_node in content_node:
if ensemble_node.tag != "ensemble" :
continue
ensemble_id = ensemble_node.get("id")
if self.from_display and ensemble_id not in self.display_ensemble_list:
continue
self.__parse_ensemble(ensemble_node, content)
#reorder annotations and annotation type from view
if self.from_display and len(self.views_dict) > 0:
new_annotation_types_dict = SortedDict()
new_annotations_dict = SortedDict()
for annotation_type in self.display_cuttings_list:
if annotation_type in self.annotation_types_dict:
new_annotation_types_dict[annotation_type] = self.annotation_types_dict[annotation_type]
for annot in self.annotations_by_annotation_types[annotation_type]:
new_annotations_dict[annot['id']] = annot
self.annotations_dict = new_annotations_dict
self.annotation_types_dict = new_annotation_types_dict
self.parsed = True
def __parse_content(self, content):
doc = lxml.etree.parse(content.iri_file_path())
authors = content.authors.all()
if len(authors) > 0 :
author = authors[0].handle
else :
author = "IRI"
if len(authors) > 1 :
contributor = authors[1].handle
else :
contributor = author
content_author = ""
res = doc.xpath("/iri/head/meta[@name='author']/@content")
if len(res) > 0:
content_author = res[0]
content_date = ""
res = doc.xpath("/iri/head/meta[@name='date']/@content")
if len(res) > 0:
content_date = res[0]
href = ""
meta_item_value = ""
if content.iri_id not in self.viewable_contents:
href = settings.FORBIDDEN_STREAM_URL
elif content.videopath:
href = content.videopath.rstrip('/') + "/" + content.src
meta_item_value = content.videopath.rstrip('/') + "/"
else:
href = content.src
new_media = {
"http://advene.liris.cnrs.fr/ns/frame_of_reference/ms" : "o=0",
"id" : content.iri_id,
"href" : href,
"unit" : "ms",
"origin" : "0",
"meta": {
"dc:creator" : author,
"dc:created" : content.creation_date.isoformat(),
"dc:contributor" : contributor,
"dc:modified" : content.update_date.isoformat(),
"dc:creator.contents" : content_author,
"dc:created.contents" : content_date,
"dc:title" : content.title,
"dc:description" : content.description,
"dc:duration" : content.get_duration(),
"item": {
"name" : "streamer",
"value": meta_item_value,
},
}
}
self.medias_dict[content.iri_id] = new_media
if self.serialize_contents:
res = doc.xpath("/iri/body/ensembles/ensemble")
for ensemble_node in res:
self.__parse_ensemble(ensemble_node, content)
def serialize_to_cinelab(self):
res = {}
if not self.parsed:
self.__parse_ldt()
project_main_media = ""
if len(self.medias_dict) > 0:
project_main_media = self.medias_dict.value_for_index(0)["id"]
res['meta'] = {
'id': self.project.ldt_id,
'dc:created':self.project.creation_date.isoformat(),
'dc:modified':self.project.modification_date.isoformat(),
'dc:contributor':self.project.changed_by,
'dc:creator':self.project.created_by,
'dc:title':self.project.title,
'dc:description':self.project.get_description(self.ldt_doc), # get from doc, parse ldt
'main_media': {"id-ref":project_main_media}
}
res['medias'] = self.medias_dict.values() if len(self.medias_dict) > 0 else None
res['lists'] = self.lists_dict.values() if len(self.lists_dict) > 0 else None
res['tags'] = self.tags.values() if len(self.tags) > 0 else None
res['views'] = self.views_dict.values() if len(self.views_dict) > 0 else None
res['annotation-types'] = self.annotation_types_dict.values() if len(self.annotation_types_dict) > 0 else None
res['annotations'] = self.annotations_dict.values() if len(self.annotations_dict) > 0 else None
return res
def get_annotations(self, first_cutting=True):
if not self.parsed:
self.__parse_ldt()
annotations = []
current_cutting = None
uri = None
for annot in self.annotations_dict.values():
logging.debug("current cutting" + repr(current_cutting) + " : annot " + annot['meta']['id-ref']) #@UndefinedVariable
if first_cutting and current_cutting and current_cutting != annot['meta']['id-ref'] :
break
current_cutting = annot['meta']['id-ref']
content_id = annot['media']
content = Content.objects.get(iri_id=content_id) #@UndefinedVariable
if annot['tags']:
tags_list = map(lambda tag_entry: self.tags_dict[tag_entry['id-ref']]['meta']['dc:title'], annot['tags'])
else:
tags_list = []
begin = int(annot['begin'])
duration = int(annot['end']) - begin
if content.media_obj and content.media_obj.external_publication_url:
uri = "%s#t=%d" % (content.media_obj.external_publication_url, begin)
annotations.append({
'begin': begin,
'duration':duration,
'title':annot['content']['title'],
'desc':annot['content']['description'],
'tags': tags_list,
'id':annot['id'],
'uri':uri
})
return annotations
"""
Quick and dirty converter from cinelab JSON to ldt format.
Does not support imports, mutliple medias, or media creation
"""
class JsonCinelab2Ldt:
def create_json(self, json):
medias = json['medias']
contentList = []
for media in medias:
c = Content.objects.get(iri_id=media['id'])
if c != None:
contentList.append(c)
meta = json['meta']
creator = meta['creator']
contributor = meta['contributor']
user = User.objects.get(username=creator)
project = Project.create_project(user, creator + '_' + contributor, contentList)
project.changed_by = contributor
ldtdoc = lxml.etree.fromstring(project.ldt.encode("utf-8"))
element = ldtdoc.xpath('/iri/annotations')
for media in contentList:
content = lxml.etree.Element('content')
content.set('id', media.iri_id)
annotation_types = json['annotation_types']
cuttings = {}
if len(annotation_types) > 0:
media = lxml.etree.SubElement(element[0], 'content')
media.set('id', medias[0]['id'])
ens = lxml.etree.SubElement(media, 'ensemble')
ens.set('title', 'Decoupages personnels')
ens.set('idProject', project.ldt_id)
ens.set('abstract', '')
ens.set('id', 'g_' + str(uuid.uuid1()))
for i in annotation_types:
cutting_infos = {'desc' : i['meta']['description']}
dec = lxml.etree.SubElement(ens, 'decoupage')
dec.set('author', contributor)
dec.set('id', 'c_' + str(uuid.uuid1()))
elements_list = lxml.etree.SubElement(dec, 'elements')
title = lxml.etree.SubElement(dec, 'title')
title.text = i['id']
abstract = lxml.etree.SubElement(dec, 'abstract')
abstract.text = i['meta']['description']
cutting_infos['xml_node'] = elements_list
cuttings[i['id']] = cutting_infos
annotations = json['annotations']
for i in annotations:
cutting_infos = cuttings[i['type']]
elements_node = cutting_infos['xml_node']
element = lxml.etree.SubElement(elements_node, 'element')
element.set('begin', str(i['begin']))
element.set('dur', str(i['end'] - i['begin']))
element.set('id', 's_' + str(uuid.uuid1()))
title = lxml.etree.SubElement(element, 'title')
audio = lxml.etree.SubElement(element, 'audio')
audio.set('source', 'undefined')
abstract = lxml.etree.SubElement(element, 'abstract')
abstract.text = i['content']['data']
tags = lxml.etree.SubElement(element, 'tags')
for tag in i['tags']:
tag_xml = lxml.etree.SubElement(tags, 'tag')
tag_xml.text = tag
project.ldt = lxml.etree.tostring(ldtdoc, pretty_print=True)
project.save()
return project.ldt