diff -r 3a30d255c235 -r 59311c28454f web/ldt/ldt_utils/projectserializer.py --- a/web/ldt/ldt_utils/projectserializer.py Sun Nov 14 20:25:22 2010 +0100 +++ /dev/null Thu Jan 01 00:00:00 1970 +0000 @@ -1,426 +0,0 @@ -from datetime import datetime -from django.utils.datastructures import SortedDict -from ldt.ldt_utils.models import Content, Project -import logging -import lxml.etree -import uuid - -DATE_FORMATS = ["%d/%m/%Y","%Y-%m-%d"] - -""" -Serialize a project object to a cinelab compatible array -""" -class ProjectSerializer: - - def __init__(self, project, from_contents=True, from_display=True): - self.project = project - self.parsed = False - self.ldt_doc = None - self.medias_dict = SortedDict() - self.annotations_dict = SortedDict() - self.annotations_by_annotation_types = {} - self.tags = {} - self.tags_dict = SortedDict() - self.annotation_types_dict = SortedDict() - self.views_dict = SortedDict() - self.lists_dict = SortedDict() - self.serialize_contents = from_contents - self.from_display = from_display - self.display_contents_list = [] - self.display_cuttings_list = [] - self.display_ensemble_list = [] - - - def __parse_views(self, display_node_list): - for display_node in display_node_list: - display_id = display_node.get(u"id", None) - if not display_id: - continue - content_list = [] - cuttings_list = [] - new_display = { - "id": display_id, - "contents": content_list, - "annotation_types": cuttings_list, - } - - for content_node in display_node.xpath("content"): - content_id = content_node.get("id") - if content_id not in content_list: - content_list.append(content_id) - if content_id not in self.display_contents_list: - self.display_contents_list.append(content_id) - for cutting_node in content_node.xpath("decoupage"): - cutting_id = cutting_node.get("id") - if cutting_id not in cuttings_list: - cuttings_list.append(cutting_id) - if cutting_id not in self.display_cuttings_list: - self.display_cuttings_list.append(cutting_id) - ensemble_id = cutting_node.get("idens") - if ensemble_id not in self.display_ensemble_list: - self.display_ensemble_list.append(ensemble_id) - self.views_dict[display_id] = new_display - - - - def __parse_ensemble(self, ensemble_node, content): - - ensemble_id = ensemble_node.attrib[u"id"] - ensemble_author = ensemble_node.attrib[u"author"] - ensemble_title = ensemble_node.attrib[u"title"] - ensemble_description = ensemble_node.attrib[u"abstract"] - ensemble_created = datetime.utcnow().isoformat() - ensemble_modified = ensemble_created - - list_items = [] - new_list = { - "id" : ensemble_id, - "items" : list_items, - "meta" : { - "dc:creator":ensemble_author, - "dc:created": ensemble_created, - "dc:contributor":"undefined", - "dc:modified": ensemble_modified, - "dc:title":ensemble_title, - "dc:description": ensemble_description, - "id-ref":content.iri_id, - "editable":"false" - } - } - - - for decoupage_node in ensemble_node: - if decoupage_node.tag != "decoupage" : - continue - - decoupage_id = decoupage_node.attrib[ u"id"] - if self.from_display and decoupage_id not in self.display_cuttings_list: - continue - decoupage_creator = decoupage_node.attrib[u"author"] - if not decoupage_creator: - decoupage_creator = "IRI" - decoupage_contributor = decoupage_creator - date_str = decoupage_node.get(u"date") - decoupage_created = None - if date_str : - for date_format in DATE_FORMATS: - try: - decoupage_created = datetime.strptime(date_str,date_format).isoformat() - break - except Exception: - decoupage_created = None - if decoupage_created is None: - decoupage_created = datetime.utcnow().isoformat() - decoupage_modified = decoupage_created - - decoupage_title = "" - for txtRes in decoupage_node.xpath("title/text()", smart_strings=False): - decoupage_title += txtRes - - decoupage_description = "" - for txtRes in decoupage_node.xpath("abstract/text()", smart_strings=False): - decoupage_description += txtRes - - - list_items.append({"id-ref":decoupage_id}) - - new_annotation_types = { - "id":decoupage_id, - "dc:creator":decoupage_creator, - "dc:created":decoupage_created, - "dc:contributor":decoupage_contributor, - "dc:modified":decoupage_modified, - "dc:title":decoupage_title, - "dc:description":decoupage_description - } - - self.annotation_types_dict[decoupage_id] = new_annotation_types - self.annotations_by_annotation_types[decoupage_id] = [] - - res = decoupage_node.xpath("elements/element") - for element_node in res: - - element_id = element_node.attrib[u"id"] - element_begin = element_node.attrib[u"begin"] - element_duration = element_node.attrib[u"dur"] - element_media = content.iri_id - element_color = element_node.attrib[u"color"] - - element_title = "" - for txtRes in element_node.xpath("title/text()", smart_strings=False): - element_title += txtRes - - element_description = "" - for txtRes in element_node.xpath("abstract/text()", smart_strings=False): - element_description += txtRes - - element_audio_src = "" - element_audio_href = "" - res = element_node.xpath("audio") - if len(res) > 0: - element_audio_src = res[0].get(u"source",u"") - element_audio_href = res[0].text - - element_tags = [] - - tags = element_node.get(u"tags",u"") - - tags_list = map(lambda s:s.strip(),tags.split(",")) - - #tags - if tags is None or len(tags) == 0: - tags_list = [] - restagnode = element_node.xpath("tag/text()", smart_strings=False) - for tagnode in restagnode: - tags_list.append(tagnode) - - if tags_list is None or len(tags_list) == 0: - tags_list = [] - restagnode = element_node.xpath("tags/tag/text()", smart_strings=False) - for tagnode in restagnode: - tags_list.append(tagnode) - - tag_date = datetime.utcnow().isoformat() - for tag_title in tags_list: - if tag_title not in self.tags: - tag_id = unicode(uuid.uuid1()) - new_tag = { - "id":tag_id, - "meta" : { - "dc:creator":"IRI", - "dc:created": tag_date, - "dc:contributor":"IRI", - "dc:modified": tag_date, - "dc:title":tag_title - } - } - self.tags[tag_title] = new_tag - self.tags_dict[tag_id] = new_tag - element_tags.append({"id-ref":tag_id}) - - if not element_tags: - element_tags = None - - new_annotation = { - "begin": element_begin, - "end": int(element_begin) + int(element_duration), - "id": element_id, - "media": element_media, - "content": { - "mimetype": "application/x-ldt-structured", - "title": element_title, - "description": element_description, - "color": element_color, - "audio": { - "src" : element_audio_src, - "mimetype": "audio/mp3", - "href": element_audio_href - }, - }, - "tags": element_tags, - "meta": { - "id-ref": decoupage_id, - "dc:creator": decoupage_creator, - "dc:contributor": decoupage_contributor, - "dc:created": decoupage_created, - "dc:modified": decoupage_modified - } - } - - self.annotations_dict[element_id] = new_annotation - self.annotations_by_annotation_types[decoupage_id].append(new_annotation) - - if not list_items: - new_list["items"] = None - self.lists_dict[ensemble_id] = new_list - - - def __parse_ldt(self): - - self.ldt_doc = lxml.etree.fromstring(self.project.ldt.encode("utf-8")) - - if self.from_display: - xpath_str = "/iri/displays/display[position()=1]" - if isinstance(self.from_display, basestring): - xpath_str = "/iri/displays/display[@id='%s']" % self.from_display - - self.__parse_views(self.ldt_doc.xpath(xpath_str)) - - res = self.ldt_doc.xpath("/iri/medias/media") - for mediaNode in res: - iri_id = mediaNode.attrib[u"id"] - if self.from_display and iri_id not in self.display_contents_list: - continue - content = Content.objects.get(iri_id=iri_id) - self.__parse_content(content) - - res = self.ldt_doc.xpath("/iri/annotations/content") - for content_node in res: - content_id = content_node.attrib[u"id"] - if self.from_display and content_id not in self.display_contents_list: - continue - content = Content.objects.get(iri_id=content_id) - for ensemble_node in content_node: - if ensemble_node.tag != "ensemble" : - continue - ensemble_id = ensemble_node.get("id") - if self.from_display and ensemble_id not in self.display_ensemble_list: - continue - self.__parse_ensemble(ensemble_node, content) - - #reorder annotations and annotation type from view - if self.from_display and len(self.views_dict) > 0: - new_annotation_types_dict = SortedDict() - new_annotations_dict = SortedDict() - for annotation_type in self.display_cuttings_list: - if annotation_type in self.annotation_types_dict: - new_annotation_types_dict[annotation_type] = self.annotation_types_dict[annotation_type] - for annot in self.annotations_by_annotation_types[annotation_type]: - new_annotations_dict[annot['id']] = annot - - self.annotations_dict = new_annotations_dict - self.annotation_types_dict = new_annotation_types_dict - - self.parsed = True - - def __parse_content(self, content): - - doc = lxml.etree.parse(content.iri_file_path()) - - authors = content.authors.all() - - if len(authors) > 0 : - author = authors[0].handle - else : - author = "IRI" - - if len(authors) > 1 : - contributor = authors[1].handle - else : - contributor = author - - content_author = "" - - res = doc.xpath("/iri/head/meta[@name='author']/@content") - if len(res) > 0: - content_author = res[0] - - - content_date = "" - - res = doc.xpath("/iri/head/meta[@name='date']/@content") - if len(res) > 0: - content_date = res[0] - - href = "" - meta_item_value = "" - if content.videopath: - href = content.videopath.rstrip('/') + "/" + content.src - meta_item_value = content.videopath.rstrip('/') + "/" - - new_media = { - "http://advene.liris.cnrs.fr/ns/frame_of_reference/ms" : "o=0", - "id" : content.iri_id, - "href" : href, - "unit" : "ms", - "origin" : "0", - "meta": { - "dc:creator" : author, - "dc:created" : content.creation_date.isoformat(), - "dc:contributor" : contributor, - "dc:modified" : content.update_date.isoformat(), - "dc:creator.contents" : content_author, - "dc:created.contents" : content_date, - "dc:title" : content.title, - "dc:description" : content.description, - "dc:duration" : content.get_duration(), - "item": { - "name" : "streamer", - "value": meta_item_value, - }, - } - } - - self.medias_dict[content.iri_id] = new_media - - if self.serialize_contents: - res = doc.xpath("/iri/body/ensembles/ensemble") - for ensemble_node in res: - self.__parse_ensemble(ensemble_node, content) - - - def serialize_to_cinelab(self): - - res = {} - - if not self.parsed: - self.__parse_ldt() - - - project_main_media = "" - if len(self.medias_dict) > 0: - project_main_media = self.medias_dict.value_for_index(0)["id"] - - res['meta'] = { - 'id': self.project.ldt_id, - 'dc:created':self.project.creation_date.isoformat(), - 'dc:modified':self.project.modification_date.isoformat(), - 'dc:contributor':self.project.changed_by, - 'dc:creator':self.project.created_by, - 'dc:title':self.project.title, - 'dc:description':self.project.get_description(self.ldt_doc), # get from doc, parse ldt - 'main_media': {"id-ref":project_main_media} - } - - - res['medias'] = self.medias_dict.values() if len(self.medias_dict) > 0 else None - res['lists'] = self.lists_dict.values() if len(self.lists_dict) > 0 else None - res['tags'] = self.tags.values() if len(self.tags) > 0 else None - res['views'] = self.views_dict.values() if len(self.views_dict) > 0 else None - - res['annotation-types'] = self.annotation_types_dict.values() if len(self.annotation_types_dict) > 0 else None - res['annotations'] = self.annotations_dict.values() if len(self.annotations_dict) > 0 else None - - - - return res - - def getAnnotations(self, first_cutting=True): - - if not self.parsed: - self.__parse_ldt() - - annotations = [] - - current_cutting = None - uri = None - for annot in self.annotations_dict.values(): - logging.debug("current cutting" + repr(current_cutting) + " : annot " + annot['meta']['id-ref']) - if first_cutting and current_cutting and current_cutting != annot['meta']['id-ref'] : - break - current_cutting = annot['meta']['id-ref'] - content_id = annot['media'] - content = Content.objects.get(iri_id=content_id) - if annot['tags']: - tags_list = map(lambda tag_entry: self.tags_dict[tag_entry['id-ref']]['meta']['dc:title'],annot['tags']) - else: - tags_list = [] - begin = int(annot['begin']) - duration = int(annot['end'])-begin - if content.media_obj and content.media_obj.external_publication_url: - uri = "%s#t=%d" % (content.media_obj.external_publication_url, begin) - - - annotations.append({ - 'begin': begin, - 'duration':duration, - 'title':annot['content']['title'], - 'desc':annot['content']['description'], - 'tags': tags_list, - 'id':annot['id'], - 'uri':uri - }) - - return annotations - -