diff -r 5bb249eefdd1 -r 22ab430e9b64 web/ldt/ldt_utils/projectserializer.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/web/ldt/ldt_utils/projectserializer.py Tue Nov 16 14:15:07 2010 +0100 @@ -0,0 +1,426 @@ +from datetime import datetime +from django.utils.datastructures import SortedDict +from ldt.ldt_utils.models import Content, Project +import logging +import lxml.etree +import uuid + +DATE_FORMATS = ["%d/%m/%Y","%Y-%m-%d"] + +""" +Serialize a project object to a cinelab compatible array +""" +class ProjectSerializer: + + def __init__(self, project, from_contents=True, from_display=True): + self.project = project + self.parsed = False + self.ldt_doc = None + self.medias_dict = SortedDict() + self.annotations_dict = SortedDict() + self.annotations_by_annotation_types = {} + self.tags = {} + self.tags_dict = SortedDict() + self.annotation_types_dict = SortedDict() + self.views_dict = SortedDict() + self.lists_dict = SortedDict() + self.serialize_contents = from_contents + self.from_display = from_display + self.display_contents_list = [] + self.display_cuttings_list = [] + self.display_ensemble_list = [] + + + def __parse_views(self, display_node_list): + for display_node in display_node_list: + display_id = display_node.get(u"id", None) + if not display_id: + continue + content_list = [] + cuttings_list = [] + new_display = { + "id": display_id, + "contents": content_list, + "annotation_types": cuttings_list, + } + + for content_node in display_node.xpath("content"): + content_id = content_node.get("id") + if content_id not in content_list: + content_list.append(content_id) + if content_id not in self.display_contents_list: + self.display_contents_list.append(content_id) + for cutting_node in content_node.xpath("decoupage"): + cutting_id = cutting_node.get("id") + if cutting_id not in cuttings_list: + cuttings_list.append(cutting_id) + if cutting_id not in self.display_cuttings_list: + self.display_cuttings_list.append(cutting_id) + ensemble_id = cutting_node.get("idens") + if ensemble_id not in self.display_ensemble_list: + self.display_ensemble_list.append(ensemble_id) + self.views_dict[display_id] = new_display + + + + def __parse_ensemble(self, ensemble_node, content): + + ensemble_id = ensemble_node.attrib[u"id"] + ensemble_author = ensemble_node.attrib[u"author"] + ensemble_title = ensemble_node.attrib[u"title"] + ensemble_description = ensemble_node.attrib[u"abstract"] + ensemble_created = datetime.utcnow().isoformat() + ensemble_modified = ensemble_created + + list_items = [] + new_list = { + "id" : ensemble_id, + "items" : list_items, + "meta" : { + "dc:creator":ensemble_author, + "dc:created": ensemble_created, + "dc:contributor":"undefined", + "dc:modified": ensemble_modified, + "dc:title":ensemble_title, + "dc:description": ensemble_description, + "id-ref":content.iri_id, + "editable":"false" + } + } + + + for decoupage_node in ensemble_node: + if decoupage_node.tag != "decoupage" : + continue + + decoupage_id = decoupage_node.attrib[ u"id"] + if self.from_display and decoupage_id not in self.display_cuttings_list: + continue + decoupage_creator = decoupage_node.attrib[u"author"] + if not decoupage_creator: + decoupage_creator = "IRI" + decoupage_contributor = decoupage_creator + date_str = decoupage_node.get(u"date") + decoupage_created = None + if date_str : + for date_format in DATE_FORMATS: + try: + decoupage_created = datetime.strptime(date_str,date_format).isoformat() + break + except Exception: + decoupage_created = None + if decoupage_created is None: + decoupage_created = datetime.utcnow().isoformat() + decoupage_modified = decoupage_created + + decoupage_title = "" + for txtRes in decoupage_node.xpath("title/text()", smart_strings=False): + decoupage_title += txtRes + + decoupage_description = "" + for txtRes in decoupage_node.xpath("abstract/text()", smart_strings=False): + decoupage_description += txtRes + + + list_items.append({"id-ref":decoupage_id}) + + new_annotation_types = { + "id":decoupage_id, + "dc:creator":decoupage_creator, + "dc:created":decoupage_created, + "dc:contributor":decoupage_contributor, + "dc:modified":decoupage_modified, + "dc:title":decoupage_title, + "dc:description":decoupage_description + } + + self.annotation_types_dict[decoupage_id] = new_annotation_types + self.annotations_by_annotation_types[decoupage_id] = [] + + res = decoupage_node.xpath("elements/element") + for element_node in res: + + element_id = element_node.attrib[u"id"] + element_begin = element_node.attrib[u"begin"] + element_duration = element_node.attrib[u"dur"] + element_media = content.iri_id + element_color = element_node.attrib[u"color"] + + element_title = "" + for txtRes in element_node.xpath("title/text()", smart_strings=False): + element_title += txtRes + + element_description = "" + for txtRes in element_node.xpath("abstract/text()", smart_strings=False): + element_description += txtRes + + element_audio_src = "" + element_audio_href = "" + res = element_node.xpath("audio") + if len(res) > 0: + element_audio_src = res[0].get(u"source",u"") + element_audio_href = res[0].text + + element_tags = [] + + tags = element_node.get(u"tags",u"") + + tags_list = map(lambda s:s.strip(),tags.split(",")) + + #tags + if tags is None or len(tags) == 0: + tags_list = [] + restagnode = element_node.xpath("tag/text()", smart_strings=False) + for tagnode in restagnode: + tags_list.append(tagnode) + + if tags_list is None or len(tags_list) == 0: + tags_list = [] + restagnode = element_node.xpath("tags/tag/text()", smart_strings=False) + for tagnode in restagnode: + tags_list.append(tagnode) + + tag_date = datetime.utcnow().isoformat() + for tag_title in tags_list: + if tag_title not in self.tags: + tag_id = unicode(uuid.uuid1()) + new_tag = { + "id":tag_id, + "meta" : { + "dc:creator":"IRI", + "dc:created": tag_date, + "dc:contributor":"IRI", + "dc:modified": tag_date, + "dc:title":tag_title + } + } + self.tags[tag_title] = new_tag + self.tags_dict[tag_id] = new_tag + element_tags.append({"id-ref":tag_id}) + + if not element_tags: + element_tags = None + + new_annotation = { + "begin": element_begin, + "end": int(element_begin) + int(element_duration), + "id": element_id, + "media": element_media, + "content": { + "mimetype": "application/x-ldt-structured", + "title": element_title, + "description": element_description, + "color": element_color, + "audio": { + "src" : element_audio_src, + "mimetype": "audio/mp3", + "href": element_audio_href + }, + }, + "tags": element_tags, + "meta": { + "id-ref": decoupage_id, + "dc:creator": decoupage_creator, + "dc:contributor": decoupage_contributor, + "dc:created": decoupage_created, + "dc:modified": decoupage_modified + } + } + + self.annotations_dict[element_id] = new_annotation + self.annotations_by_annotation_types[decoupage_id].append(new_annotation) + + if not list_items: + new_list["items"] = None + self.lists_dict[ensemble_id] = new_list + + + def __parse_ldt(self): + + self.ldt_doc = lxml.etree.fromstring(self.project.ldt.encode("utf-8")) + + if self.from_display: + xpath_str = "/iri/displays/display[position()=1]" + if isinstance(self.from_display, basestring): + xpath_str = "/iri/displays/display[@id='%s']" % self.from_display + + self.__parse_views(self.ldt_doc.xpath(xpath_str)) + + res = self.ldt_doc.xpath("/iri/medias/media") + for mediaNode in res: + iri_id = mediaNode.attrib[u"id"] + if self.from_display and iri_id not in self.display_contents_list: + continue + content = Content.objects.get(iri_id=iri_id) + self.__parse_content(content) + + res = self.ldt_doc.xpath("/iri/annotations/content") + for content_node in res: + content_id = content_node.attrib[u"id"] + if self.from_display and content_id not in self.display_contents_list: + continue + content = Content.objects.get(iri_id=content_id) + for ensemble_node in content_node: + if ensemble_node.tag != "ensemble" : + continue + ensemble_id = ensemble_node.get("id") + if self.from_display and ensemble_id not in self.display_ensemble_list: + continue + self.__parse_ensemble(ensemble_node, content) + + #reorder annotations and annotation type from view + if self.from_display and len(self.views_dict) > 0: + new_annotation_types_dict = SortedDict() + new_annotations_dict = SortedDict() + for annotation_type in self.display_cuttings_list: + if annotation_type in self.annotation_types_dict: + new_annotation_types_dict[annotation_type] = self.annotation_types_dict[annotation_type] + for annot in self.annotations_by_annotation_types[annotation_type]: + new_annotations_dict[annot['id']] = annot + + self.annotations_dict = new_annotations_dict + self.annotation_types_dict = new_annotation_types_dict + + self.parsed = True + + def __parse_content(self, content): + + doc = lxml.etree.parse(content.iri_file_path()) + + authors = content.authors.all() + + if len(authors) > 0 : + author = authors[0].handle + else : + author = "IRI" + + if len(authors) > 1 : + contributor = authors[1].handle + else : + contributor = author + + content_author = "" + + res = doc.xpath("/iri/head/meta[@name='author']/@content") + if len(res) > 0: + content_author = res[0] + + + content_date = "" + + res = doc.xpath("/iri/head/meta[@name='date']/@content") + if len(res) > 0: + content_date = res[0] + + href = "" + meta_item_value = "" + if content.videopath: + href = content.videopath.rstrip('/') + "/" + content.src + meta_item_value = content.videopath.rstrip('/') + "/" + + new_media = { + "http://advene.liris.cnrs.fr/ns/frame_of_reference/ms" : "o=0", + "id" : content.iri_id, + "href" : href, + "unit" : "ms", + "origin" : "0", + "meta": { + "dc:creator" : author, + "dc:created" : content.creation_date.isoformat(), + "dc:contributor" : contributor, + "dc:modified" : content.update_date.isoformat(), + "dc:creator.contents" : content_author, + "dc:created.contents" : content_date, + "dc:title" : content.title, + "dc:description" : content.description, + "dc:duration" : content.get_duration(), + "item": { + "name" : "streamer", + "value": meta_item_value, + }, + } + } + + self.medias_dict[content.iri_id] = new_media + + if self.serialize_contents: + res = doc.xpath("/iri/body/ensembles/ensemble") + for ensemble_node in res: + self.__parse_ensemble(ensemble_node, content) + + + def serialize_to_cinelab(self): + + res = {} + + if not self.parsed: + self.__parse_ldt() + + + project_main_media = "" + if len(self.medias_dict) > 0: + project_main_media = self.medias_dict.value_for_index(0)["id"] + + res['meta'] = { + 'id': self.project.ldt_id, + 'dc:created':self.project.creation_date.isoformat(), + 'dc:modified':self.project.modification_date.isoformat(), + 'dc:contributor':self.project.changed_by, + 'dc:creator':self.project.created_by, + 'dc:title':self.project.title, + 'dc:description':self.project.get_description(self.ldt_doc), # get from doc, parse ldt + 'main_media': {"id-ref":project_main_media} + } + + + res['medias'] = self.medias_dict.values() if len(self.medias_dict) > 0 else None + res['lists'] = self.lists_dict.values() if len(self.lists_dict) > 0 else None + res['tags'] = self.tags.values() if len(self.tags) > 0 else None + res['views'] = self.views_dict.values() if len(self.views_dict) > 0 else None + + res['annotation-types'] = self.annotation_types_dict.values() if len(self.annotation_types_dict) > 0 else None + res['annotations'] = self.annotations_dict.values() if len(self.annotations_dict) > 0 else None + + + + return res + + def getAnnotations(self, first_cutting=True): + + if not self.parsed: + self.__parse_ldt() + + annotations = [] + + current_cutting = None + uri = None + for annot in self.annotations_dict.values(): + logging.debug("current cutting" + repr(current_cutting) + " : annot " + annot['meta']['id-ref']) + if first_cutting and current_cutting and current_cutting != annot['meta']['id-ref'] : + break + current_cutting = annot['meta']['id-ref'] + content_id = annot['media'] + content = Content.objects.get(iri_id=content_id) + if annot['tags']: + tags_list = map(lambda tag_entry: self.tags_dict[tag_entry['id-ref']]['meta']['dc:title'],annot['tags']) + else: + tags_list = [] + begin = int(annot['begin']) + duration = int(annot['end'])-begin + if content.media_obj and content.media_obj.external_publication_url: + uri = "%s#t=%d" % (content.media_obj.external_publication_url, begin) + + + annotations.append({ + 'begin': begin, + 'duration':duration, + 'title':annot['content']['title'], + 'desc':annot['content']['description'], + 'tags': tags_list, + 'id':annot['id'], + 'uri':uri + }) + + return annotations + +