web/ldt/ldt_utils/projectserializer.py
changeset 1 eb9188f2ee4f
equal deleted inserted replaced
0:85b071fb75b2 1:eb9188f2ee4f
       
     1 from datetime import datetime
       
     2 from django.utils.datastructures import SortedDict
       
     3 from ldt.ldt_utils.models import Content, Project
       
     4 import logging
       
     5 import lxml.etree
       
     6 import uuid
       
     7 
       
     8 DATE_FORMATS = ["%d/%m/%Y","%Y-%m-%d"]
       
     9 
       
    10 """
       
    11 Serialize a project object to a cinelab compatible array
       
    12 """
       
    13 class ProjectSerializer:
       
    14     
       
    15     def __init__(self, project, from_contents=True, from_display=True):
       
    16         self.project = project
       
    17         self.parsed = False
       
    18         self.ldt_doc = None
       
    19         self.medias_dict = SortedDict()
       
    20         self.annotations_dict = SortedDict()
       
    21         self.annotations_by_annotation_types = {}
       
    22         self.tags = {}
       
    23         self.tags_dict = SortedDict()
       
    24         self.annotation_types_dict = SortedDict()
       
    25         self.views_dict = SortedDict()
       
    26         self.lists_dict = SortedDict()
       
    27         self.serialize_contents = from_contents
       
    28         self.from_display = from_display
       
    29         self.display_contents_list = []
       
    30         self.display_cuttings_list = []
       
    31         self.display_ensemble_list = []
       
    32         
       
    33         
       
    34     def __parse_views(self, display_node_list):
       
    35         for display_node in display_node_list:
       
    36             display_id = display_node.get(u"id", None)
       
    37             if not display_id:
       
    38                 continue
       
    39             content_list = []
       
    40             cuttings_list = []
       
    41             new_display = {
       
    42                 "id": display_id,
       
    43                 "contents": content_list,
       
    44                 "annotation_types": cuttings_list,
       
    45             }
       
    46             
       
    47             for content_node in display_node.xpath("content"):
       
    48                 content_id = content_node.get("id")
       
    49                 if content_id not in content_list:
       
    50                     content_list.append(content_id)                    
       
    51                 if content_id not in self.display_contents_list:
       
    52                     self.display_contents_list.append(content_id)
       
    53                 for cutting_node  in content_node.xpath("decoupage"):
       
    54                     cutting_id = cutting_node.get("id")
       
    55                     if cutting_id not in cuttings_list:
       
    56                         cuttings_list.append(cutting_id)
       
    57                     if cutting_id not in self.display_cuttings_list:
       
    58                         self.display_cuttings_list.append(cutting_id)
       
    59                     ensemble_id = cutting_node.get("idens")
       
    60                     if ensemble_id not in self.display_ensemble_list:
       
    61                         self.display_ensemble_list.append(ensemble_id)
       
    62             self.views_dict[display_id] = new_display
       
    63                     
       
    64         
       
    65     
       
    66     def __parse_ensemble(self, ensemble_node, content):
       
    67         
       
    68         ensemble_id = ensemble_node.attrib[u"id"]
       
    69         ensemble_author = ensemble_node.attrib[u"author"]
       
    70         ensemble_title = ensemble_node.attrib[u"title"]
       
    71         ensemble_description = ensemble_node.attrib[u"abstract"]
       
    72         ensemble_created = datetime.utcnow().isoformat()
       
    73         ensemble_modified = ensemble_created 
       
    74         
       
    75         list_items = []
       
    76         new_list = {
       
    77             "id" : ensemble_id,
       
    78             "items" : list_items,
       
    79             "meta" : {
       
    80                 "dc:creator":ensemble_author,
       
    81                 "dc:created": ensemble_created,
       
    82                 "dc:contributor":"undefined",
       
    83                 "dc:modified": ensemble_modified,
       
    84                 "dc:title":ensemble_title,
       
    85                 "dc:description": ensemble_description,
       
    86                 "id-ref":content.iri_id,
       
    87                 "editable":"false"
       
    88             }
       
    89         }
       
    90         
       
    91         
       
    92         for decoupage_node in ensemble_node:
       
    93             if decoupage_node.tag != "decoupage" :
       
    94                 continue
       
    95             
       
    96             decoupage_id = decoupage_node.attrib[ u"id"]
       
    97             if self.from_display and decoupage_id not in self.display_cuttings_list:
       
    98                 continue
       
    99             decoupage_creator = decoupage_node.attrib[u"author"]
       
   100             if not decoupage_creator:
       
   101                 decoupage_creator = "IRI"
       
   102             decoupage_contributor = decoupage_creator
       
   103             date_str = decoupage_node.get(u"date")
       
   104             decoupage_created = None
       
   105             if date_str :
       
   106                 for date_format in DATE_FORMATS:
       
   107                     try:
       
   108                         decoupage_created = datetime.strptime(date_str,date_format).isoformat()
       
   109                         break
       
   110                     except Exception:
       
   111                         decoupage_created = None
       
   112             if decoupage_created is None:
       
   113                 decoupage_created = datetime.utcnow().isoformat()
       
   114             decoupage_modified = decoupage_created
       
   115             
       
   116             decoupage_title = ""
       
   117             for txtRes in decoupage_node.xpath("title/text()", smart_strings=False): 
       
   118                     decoupage_title += txtRes
       
   119 
       
   120             decoupage_description = ""
       
   121             for txtRes in decoupage_node.xpath("abstract/text()", smart_strings=False): 
       
   122                     decoupage_description += txtRes
       
   123 
       
   124             
       
   125             list_items.append({"id-ref":decoupage_id})
       
   126             
       
   127             new_annotation_types = {
       
   128                 "id":decoupage_id,
       
   129                 "dc:creator":decoupage_creator,
       
   130                 "dc:created":decoupage_created,
       
   131                 "dc:contributor":decoupage_contributor,
       
   132                 "dc:modified":decoupage_modified,
       
   133                 "dc:title":decoupage_title,
       
   134                 "dc:description":decoupage_description
       
   135             }
       
   136             
       
   137             self.annotation_types_dict[decoupage_id] = new_annotation_types
       
   138             self.annotations_by_annotation_types[decoupage_id] = []
       
   139                         
       
   140             res = decoupage_node.xpath("elements/element")
       
   141             for element_node in res:
       
   142                 
       
   143                 element_id = element_node.attrib[u"id"]
       
   144                 element_begin = element_node.attrib[u"begin"]
       
   145                 element_duration = element_node.attrib[u"dur"]
       
   146                 element_media = content.iri_id
       
   147                 element_color = element_node.attrib[u"color"]
       
   148                 
       
   149                 element_title = ""
       
   150                 for txtRes in element_node.xpath("title/text()", smart_strings=False): 
       
   151                     element_title += txtRes
       
   152         
       
   153                 element_description = ""
       
   154                 for txtRes in element_node.xpath("abstract/text()", smart_strings=False): 
       
   155                     element_description += txtRes
       
   156                 
       
   157                 element_audio_src = ""
       
   158                 element_audio_href = ""
       
   159                 res = element_node.xpath("audio")
       
   160                 if len(res) > 0:
       
   161                     element_audio_src = res[0].get(u"source",u"")
       
   162                     element_audio_href =  res[0].text                
       
   163                 
       
   164                 element_tags = []
       
   165                 
       
   166                 tags = element_node.get(u"tags",u"")
       
   167                 
       
   168                 tags_list = map(lambda s:s.strip(),tags.split(","))
       
   169 
       
   170                 #tags                                
       
   171                 if tags is None or len(tags) == 0:
       
   172                     tags_list = []
       
   173                     restagnode = element_node.xpath("tag/text()", smart_strings=False)
       
   174                     for tagnode in restagnode:
       
   175                         tags_list.append(tagnode)
       
   176                         
       
   177                 if tags_list is None or len(tags_list) == 0:
       
   178                     tags_list = []
       
   179                     restagnode = element_node.xpath("tags/tag/text()", smart_strings=False)
       
   180                     for tagnode in restagnode:
       
   181                         tags_list.append(tagnode)
       
   182                 
       
   183                 tag_date = datetime.utcnow().isoformat()
       
   184                 for tag_title in tags_list:
       
   185                     if tag_title not in self.tags:
       
   186                         tag_id = unicode(uuid.uuid1())
       
   187                         new_tag = {
       
   188                             "id":tag_id,
       
   189                             "meta" : {
       
   190                                 "dc:creator":"IRI",
       
   191                                 "dc:created": tag_date,
       
   192                                 "dc:contributor":"IRI",
       
   193                                 "dc:modified": tag_date,
       
   194                                 "dc:title":tag_title
       
   195                             }
       
   196                         }
       
   197                         self.tags[tag_title] = new_tag
       
   198                         self.tags_dict[tag_id] = new_tag
       
   199                     element_tags.append({"id-ref":tag_id})
       
   200 
       
   201                 if not element_tags:
       
   202                     element_tags = None
       
   203                     
       
   204                 new_annotation = {
       
   205                     "begin": element_begin,
       
   206                     "end": int(element_begin) + int(element_duration),
       
   207                     "id": element_id,
       
   208                     "media": element_media,
       
   209                     "content": {
       
   210                         "mimetype": "application/x-ldt-structured",
       
   211                         "title": element_title,
       
   212                         "description": element_description,
       
   213                         "color": element_color,
       
   214                         "audio": {
       
   215                             "src" : element_audio_src,
       
   216                             "mimetype": "audio/mp3",
       
   217                             "href": element_audio_href
       
   218                         },
       
   219                     },
       
   220                     "tags": element_tags,
       
   221                     "meta": {
       
   222                         "id-ref": decoupage_id,
       
   223                         "dc:creator": decoupage_creator,
       
   224                         "dc:contributor": decoupage_contributor,
       
   225                         "dc:created": decoupage_created,
       
   226                         "dc:modified": decoupage_modified
       
   227                     }
       
   228                 }
       
   229                 
       
   230                 self.annotations_dict[element_id] = new_annotation
       
   231                 self.annotations_by_annotation_types[decoupage_id].append(new_annotation)
       
   232         
       
   233         if not list_items:
       
   234             new_list["items"] = None
       
   235         self.lists_dict[ensemble_id] = new_list
       
   236 
       
   237 
       
   238     def __parse_ldt(self):
       
   239         
       
   240         self.ldt_doc = lxml.etree.fromstring(self.project.ldt.encode("utf-8"))
       
   241         
       
   242         if self.from_display:
       
   243             xpath_str = "/iri/displays/display[position()=1]"
       
   244             if isinstance(self.from_display, basestring):
       
   245                 xpath_str = "/iri/displays/display[@id='%s']" % self.from_display
       
   246             
       
   247             self.__parse_views(self.ldt_doc.xpath(xpath_str))
       
   248         
       
   249         res = self.ldt_doc.xpath("/iri/medias/media")
       
   250         for mediaNode in res:
       
   251             iri_id = mediaNode.attrib[u"id"]
       
   252             if self.from_display and iri_id not in self.display_contents_list:
       
   253                 continue
       
   254             content = Content.objects.get(iri_id=iri_id)
       
   255             self.__parse_content(content)
       
   256             
       
   257         res = self.ldt_doc.xpath("/iri/annotations/content")
       
   258         for content_node in res:
       
   259             content_id = content_node.attrib[u"id"]
       
   260             if self.from_display and content_id not in self.display_contents_list:
       
   261                 continue
       
   262             content = Content.objects.get(iri_id=content_id)
       
   263             for ensemble_node in content_node:
       
   264                 if ensemble_node.tag != "ensemble" :
       
   265                     continue
       
   266                 ensemble_id = ensemble_node.get("id")
       
   267                 if self.from_display and ensemble_id not in self.display_ensemble_list:
       
   268                     continue
       
   269                 self.__parse_ensemble(ensemble_node, content)            
       
   270         
       
   271         #reorder annotations and annotation type from view
       
   272         if self.from_display and len(self.views_dict) > 0:
       
   273             new_annotation_types_dict = SortedDict()
       
   274             new_annotations_dict = SortedDict()
       
   275             for annotation_type in self.display_cuttings_list:
       
   276                 if annotation_type in self.annotation_types_dict:
       
   277                     new_annotation_types_dict[annotation_type] = self.annotation_types_dict[annotation_type]
       
   278                     for annot in self.annotations_by_annotation_types[annotation_type]:
       
   279                         new_annotations_dict[annot['id']] = annot
       
   280                     
       
   281             self.annotations_dict = new_annotations_dict
       
   282             self.annotation_types_dict = new_annotation_types_dict
       
   283         
       
   284         self.parsed = True
       
   285     
       
   286     def __parse_content(self, content):
       
   287         
       
   288         doc = lxml.etree.parse(content.iri_file_path())
       
   289         
       
   290         authors = content.authors.all()
       
   291         
       
   292         if len(authors) > 0 :
       
   293             author = authors[0].handle
       
   294         else :
       
   295             author = "IRI"
       
   296         
       
   297         if len(authors) > 1 :
       
   298             contributor = authors[1].handle
       
   299         else :
       
   300             contributor = author
       
   301         
       
   302         content_author = ""
       
   303         
       
   304         res = doc.xpath("/iri/head/meta[@name='author']/@content")
       
   305         if len(res) > 0:
       
   306             content_author = res[0]
       
   307         
       
   308         
       
   309         content_date = ""
       
   310         
       
   311         res = doc.xpath("/iri/head/meta[@name='date']/@content")
       
   312         if len(res) > 0:
       
   313             content_date = res[0]
       
   314 
       
   315         href = ""
       
   316         meta_item_value = ""
       
   317         if content.videopath:
       
   318             href = content.videopath.rstrip('/') + "/" + content.src
       
   319             meta_item_value = content.videopath.rstrip('/') + "/"
       
   320 
       
   321         new_media = {
       
   322              "http://advene.liris.cnrs.fr/ns/frame_of_reference/ms" : "o=0",
       
   323              "id" : content.iri_id,
       
   324              "href" : href,
       
   325              "unit" : "ms",
       
   326              "origin" : "0",
       
   327              "meta": {
       
   328                  "dc:creator" : author,
       
   329                  "dc:created" : content.creation_date.isoformat(),
       
   330                  "dc:contributor" : contributor,
       
   331                  "dc:modified" : content.update_date.isoformat(),
       
   332                  "dc:creator.contents" : content_author,
       
   333                  "dc:created.contents" : content_date,
       
   334                  "dc:title" : content.title,
       
   335                  "dc:description" : content.description,
       
   336                  "dc:duration" : content.get_duration(),
       
   337                  "item": {
       
   338                      "name" : "streamer",
       
   339                      "value": meta_item_value, 
       
   340                  },
       
   341              }
       
   342         }
       
   343         
       
   344         self.medias_dict[content.iri_id] = new_media
       
   345         
       
   346         if self.serialize_contents:        
       
   347             res = doc.xpath("/iri/body/ensembles/ensemble")
       
   348             for ensemble_node in res:
       
   349                 self.__parse_ensemble(ensemble_node, content)
       
   350 
       
   351     
       
   352     def serialize_to_cinelab(self):
       
   353     
       
   354         res = {}
       
   355         
       
   356         if not self.parsed:
       
   357             self.__parse_ldt()    
       
   358 
       
   359         
       
   360         project_main_media = ""
       
   361         if len(self.medias_dict) > 0:
       
   362             project_main_media = self.medias_dict.value_for_index(0)["id"]
       
   363         
       
   364         res['meta'] = {
       
   365              'id': self.project.ldt_id,
       
   366              'dc:created':self.project.creation_date.isoformat(),
       
   367              'dc:modified':self.project.modification_date.isoformat(),
       
   368              'dc:contributor':self.project.changed_by,
       
   369              'dc:creator':self.project.created_by,
       
   370              'dc:title':self.project.title,
       
   371              'dc:description':self.project.get_description(self.ldt_doc), # get from doc, parse ldt
       
   372              'main_media': {"id-ref":project_main_media}
       
   373             }
       
   374                 
       
   375                     
       
   376         res['medias'] =  self.medias_dict.values() if len(self.medias_dict) > 0 else None
       
   377         res['lists'] = self.lists_dict.values() if len(self.lists_dict) > 0 else None
       
   378         res['tags'] = self.tags.values() if len(self.tags) > 0 else None
       
   379         res['views'] = self.views_dict.values() if len(self.views_dict) > 0 else None
       
   380         
       
   381         res['annotation-types'] = self.annotation_types_dict.values() if len(self.annotation_types_dict) > 0 else None
       
   382         res['annotations'] = self.annotations_dict.values() if len(self.annotations_dict) > 0 else None 
       
   383 
       
   384         
       
   385 
       
   386         return res
       
   387     
       
   388     def getAnnotations(self, first_cutting=True):
       
   389         
       
   390         if not self.parsed:
       
   391             self.__parse_ldt()
       
   392         
       
   393         annotations = []
       
   394         
       
   395         current_cutting = None
       
   396         uri = None
       
   397         for annot in self.annotations_dict.values():
       
   398             logging.debug("current cutting" + repr(current_cutting) + " : annot " + annot['meta']['id-ref'])
       
   399             if first_cutting and current_cutting and current_cutting != annot['meta']['id-ref'] :
       
   400                 break
       
   401             current_cutting = annot['meta']['id-ref']
       
   402             content_id = annot['media']
       
   403             content = Content.objects.get(iri_id=content_id)
       
   404             if annot['tags']:
       
   405                 tags_list = map(lambda tag_entry: self.tags_dict[tag_entry['id-ref']]['meta']['dc:title'],annot['tags'])
       
   406             else:
       
   407                 tags_list = []
       
   408             begin = int(annot['begin'])
       
   409             duration = int(annot['end'])-begin
       
   410             if content.media_obj and content.media_obj.external_publication_url:
       
   411                 uri = "%s#t=%d" % (content.media_obj.external_publication_url, begin)
       
   412 
       
   413         
       
   414             annotations.append({
       
   415                 'begin': begin,
       
   416                 'duration':duration,
       
   417                 'title':annot['content']['title'],
       
   418                 'desc':annot['content']['description'],
       
   419                 'tags': tags_list,
       
   420                 'id':annot['id'],
       
   421                 'uri':uri
       
   422             })
       
   423             
       
   424         return annotations
       
   425 
       
   426