web/ldt/ldt_utils/projectserializer.py
author ymh <ymh.work@gmail.com>
Fri, 11 Jun 2010 00:17:30 +0200
changeset 27 f81da251d0aa
parent 26 d5b6908e4da5
permissions -rw-r--r--
improve serialization and add partials

import xml.dom
import Ft.Xml.Domlette
import xml.xpath
from datetime import datetime
from ldt.ldt_utils.models import Content, Project

DATE_FORMATS = ["%d/%m/%Y","%Y-%m-%d"]

"""
Serialize a project object to a cinelab compatible array
"""
class ProjectSerializer:
    
    def __init__(self, project):
        self.project = project
        self.ldt_doc = None
        self.medias = []
        self.annotations = []
        self.tags = {}
        self.annotation_types = []
        self.views = []
        self.lists = []
        
    
    def __parse_ensemble(self, ensemble_node, content):
        
        ensemble_id = ensemble_node.getAttributeNS(None,u"id")
        ensemble_author = ensemble_node.getAttributeNS(None,u"author")
        ensemble_title = ensemble_node.getAttributeNS(None,u"title")
        ensemble_description = ensemble_node.getAttributeNS(None,u"abstract")
        ensemble_created = datetime.utcnow().isoformat()
        ensemble_modified = ensemble_created 
        
        list_items = []
        new_list = {
            "id" : ensemble_id,
            "items" : list_items,
            "meta" : {
                "dc:creator":ensemble_author,
                "dc:created": ensemble_created,
                "dc:contributor":"undefined",
                "dc:modified": ensemble_modified,
                "dc:title":ensemble_title,
                "dc:description": ensemble_description,
                "id-ref":content.iri_id,
                "editable":"false"
            }
        }
        
        
        for decoupage_node in ensemble_node.childNodes:
            if decoupage_node.nodeType != xml.dom.Node.ELEMENT_NODE or decoupage_node.tagName != "decoupage" :
                continue
            
            decoupage_id = decoupage_node.getAttributeNS(None, u"id")
            decoupage_creator = decoupage_node.getAttributeNS(None,u"author")
            if not decoupage_creator:
                decoupage_creator = "IRI"
            decoupage_contributor = decoupage_creator
            date_str = decoupage_node.getAttributeNS(None,u"date")
            decoupage_created = None
            if date_str :
                for date_format in DATE_FORMATS:
                    try:
                        decoupage_created = datetime.strptime(date_str,date_format).isoformat()
                        break
                    except Exception:
                        decoupage_created = None
            if decoupage_created is None:
                decoupage_created = datetime.utcnow().isoformat()
            decoupage_modified = decoupage_created
            
            decoupage_title = ""
            for txtRes in xml.xpath.Evaluate("title/text()", decoupage_node): 
                    decoupage_title += txtRes.data

            decoupage_description = ""
            for txtRes in xml.xpath.Evaluate("abstract/text()", decoupage_node): 
                    decoupage_description += txtRes.data
            

            
            list_items.append({"id-ref":decoupage_id})
            
            new_annotation_types = {
                "id":decoupage_id,
                "dc:creator":decoupage_creator,
                "dc:created":decoupage_created,
                "dc:contributor":decoupage_contributor,
                "dc:modified":decoupage_modified,
                "dc:title":decoupage_title,
                "dc:description":decoupage_description
            }
            
            self.annotation_types.append(new_annotation_types)            
                        
            res = xml.xpath.Evaluate("elements/element", decoupage_node)
            for element_node in res:
                
                element_id = element_node.getAttributeNS(None,u"id")
                element_begin = element_node.getAttributeNS(None,u"begin")
                element_duration = element_node.getAttributeNS(None,u"dur")
                element_media = content.iri_id
                element_color = element_node.getAttributeNS(None,u"color")
                
                element_title = ""
                for txtRes in xml.xpath.Evaluate("title/text()", element_node): 
                    element_title += txtRes.data 
        
                element_description = ""
                for txtRes in xml.xpath.Evaluate("abstract/text()", element_node): 
                    element_description += txtRes.data 
                
                element_audio_src = ""
                element_audio_href = ""
                res = xml.xpath.Evaluate("audio", element_node)
                if len(res) > 0:
                    element_audio_src = res[0].getAttributeNS(None, u"source")
                    ltext = []
                    for n in res[0].childNodes:
                        if n.nodeType in (dom.Node.TEXT_NODE, dom.Node.CDATA_SECTION_NODE):
                            ltext.append(n.data)
                    element_audio_href = ''.join(ltext)
                
                
                element_tags = []
                
                tags = element_node.getAttributeNS(None,u"tags")
                
                tags_list = map(lambda s:s.strip(),tags.split(","))

                #tags                                
                if tags is None or len(tags) == 0:
                    tags_list = []
                    restagnode = xml.xpath.Evaluate("tag/text()", element_node)
                    for tagnode in restagnode:
                        tags_list.append(tagnode.data)
                        
                if tags_list is None or len(tags_list) == 0:
                    tags_list = []
                    restagnode = xml.xpath.Evaluate("tags/tag/text()", element_node)
                    for tagnode in restagnode:
                        tags_list.append(tagnode.data)
                
                tag_date = datetime.utcnow().isoformat()
                for tag_id in tags_list:
                    if tag_id not in self.tags:
                        new_tag = {
                            "id":tag_id,
                            "meta" : {
                                "dc:creator":"IRI",
                                "dc:created": tag_date,
                                "dc:contributor":"IRI",
                                "dc:modified": tag_date,
                                "dc:title":tag_id
                            }
                        }
                        self.tags[tag_id] = new_tag
                    element_tags.append({"id-ref":tag_id})

                if not element_tags:
                    element_tags = None
                    
                new_annotation = {
                    "begin": element_begin,
                    "end": int(element_begin) + int(element_duration),
                    "id": element_id,
                    "media": element_media,
                    "content": {
                        "mimetype": "application/x-ldt-structured",
                        "title": element_title,
                        "description": element_description,
                        "color": element_color,
                        "audio": {
                            "src" : element_audio_src,
                            "mimetype": "audio/mp3",
                            "href": element_audio_href
                        },
                    },
                    "tags": element_tags,
                    "meta": {
                        "id-ref": decoupage_id,
                        "dc:creator": decoupage_creator,
                        "dc:contributor": decoupage_contributor,
                        "dc:created": decoupage_created,
                        "dc:modified": decoupage_modified
                    }
                }
                
                self.annotations.append(new_annotation)
        
        if not list_items:
            new_list["items"] = None
        self.lists.append(new_list)


    def __parse_ldt(self):
        
        doc = xml.dom.minidom.parseString(self.project.ldt.encode("utf-8"))
        self.ldt_doc = Ft.Xml.Domlette.ConvertDocument(doc)
        con = xml.xpath.Context.Context(doc, 1, 1, None)
        
        res = xml.xpath.Evaluate("/iri/medias/media", context=con)
        for mediaNode in res:
            iri_id = mediaNode.getAttributeNS(None,u"id")
            content = Content.objects.get(iri_id=iri_id)
            self.__parse_content(content)
            
        res = xml.xpath.Evaluate("/iri/annotations/content",context=con)               
        
        for content_node in res:
            content_id = content_node.getAttributeNS(None, u"id")
            content = Content.objects.get(iri_id=content_id)
            for ensemble_node in content_node.childNodes:
                if ensemble_node.nodeType != xml.dom.Node.ELEMENT_NODE or ensemble_node.tagName != "ensemble" :
                    continue
                self.__parse_ensemble(ensemble_node, content)
            
        #res = xml.xpath.Evaluate("/iri/displays/display",context=con)
        
        #for display_node in res:
            
    
    
    def __parse_content(self, content):
        
        doc = Ft.Xml.Domlette.ConvertDocument(xml.dom.minidom.parse(content.iri_file_path()))
        con = xml.xpath.Context.Context(doc, 1, 1, None)
        
        authors = content.authors.all()
        
        if len(authors) > 0 :
            author = authors[0].handle
        else :
            author = "IRI"
        
        if len(authors) > 1 :
            contributor = authors[1].handle
        else :
            contributor = author
        
        content_author = ""
        
        res = xml.xpath.Evaluate("/iri/head/meta[@name='author']/@content", context=con)
        if len(res) > 0:
            content_author = res[0].value
        
        
        content_date = ""
        
        res = xml.xpath.Evaluate("/iri/head/meta[@name='date']/@content", context=con)
        if len(res) > 0:
            content_date = res[0].value

        
        new_media = {
             "http://advene.liris.cnrs.fr/ns/frame_of_reference/ms" : "o=0",
             "id" : content.iri_id,
             "href" : content.videopath.rstrip('/') + "/" + content.src,
             "unit" : "ms",
             "origin" : "0",
             "meta": {
                 "dc:creator" : author,
                 "dc:created" : content.creation_date.isoformat(),
                 "dc:contributor" : contributor,
                 "dc:modified" : content.update_date.isoformat(),
                 "dc:creator.contents" : content_author,
                 "dc:created.contents" : content_date,
                 "dc:title" : content.title,
                 "dc:description" : content.description,
                 "dc:duration" : content.get_duration(),
                 "item": {
                     "name" : "streamer",
                     "value": content.videopath.rstrip('/') + "/"
                 },
             }
        }
        
        self.medias.append(new_media)
        
        
        res = xml.xpath.Evaluate("/iri/body/ensembles/ensemble",context=con)
        
        for ensemble_node in res:
            self.__parse_ensemble(ensemble_node, content)

    
    def serialize_to_cinelab(self):
    
        res = {}
        
        self.__parse_ldt()
        
        project_main_media = ""
        if len(self.medias) > 0:
            project_main_media = self.medias[0]["id"]
        
        res['meta'] = {
             'id': self.project.ldt_id,
             'dc:created':self.project.creation_date.isoformat(),
             'dc:modified':self.project.modification_date.isoformat(),
             'dc:contributor':self.project.changed_by,
             'dc:creator':self.project.created_by,
             'dc:title':self.project.title,
             'dc:description':self.project.get_description(self.ldt_doc), # get from doc, parse ldt
             'main_media': {"id-ref":project_main_media}
            }
        
        if not self.medias:
            self.medias = None
                        
        if not self.annotation_types:
            self.annotation_types = None
        
        if len(self.tags) == 0:
            tags = None
        else:
            tags = self.tags.values()
            
        if not self.lists:
            self.lists = None
            
        if not self.views:
            self.views = None
            
        if not self.annotations:
            self.annotations = None
        
        res['medias'] =  self.medias
        res['annotation-types'] = self.annotation_types
        res['annotations'] = self.annotations
        res['lists'] = self.lists
        res['tags'] = tags
        res['views'] = self.views # ignored for the moment
        
        return res