web/ldt/ldt_utils/projectserializer.py
changeset 98 c9460033138f
parent 97 66f6aff5c382
child 99 0fb4b009c6eb
equal deleted inserted replaced
97:66f6aff5c382 98:c9460033138f
     1 import lxml.etree
       
     2 import uuid
       
     3 from datetime import datetime
     1 from datetime import datetime
     4 from django.utils.datastructures import SortedDict
     2 from django.utils.datastructures import SortedDict
     5 from ldt.ldt_utils.models import Content, Project
     3 from ldt.ldt_utils.models import Content, Project
       
     4 import logging
       
     5 import lxml.etree
       
     6 import uuid
     6 
     7 
     7 DATE_FORMATS = ["%d/%m/%Y","%Y-%m-%d"]
     8 DATE_FORMATS = ["%d/%m/%Y","%Y-%m-%d"]
     8 
     9 
     9 """
    10 """
    10 Serialize a project object to a cinelab compatible array
    11 Serialize a project object to a cinelab compatible array
    13     
    14     
    14     def __init__(self, project, from_contents=True, from_display=True):
    15     def __init__(self, project, from_contents=True, from_display=True):
    15         self.project = project
    16         self.project = project
    16         self.parsed = False
    17         self.parsed = False
    17         self.ldt_doc = None
    18         self.ldt_doc = None
    18         self.medias = []
       
    19         self.medias_by_id = {}
       
    20         self.medias_dict = SortedDict()
    19         self.medias_dict = SortedDict()
    21         self.annotations = []
       
    22         self.annotations_by_annotation_type = {}
       
    23         self.annotations_dict = SortedDict()
    20         self.annotations_dict = SortedDict()
    24         self.tags = {}
    21         self.tags = {}
    25         self.tags_by_id = {}
    22         self.tags_dict = SortedDict()
    26         self.annotation_types = []
       
    27         self.annotation_types_by_id = {}
       
    28         self.annotation_types_dict = SortedDict()
    23         self.annotation_types_dict = SortedDict()
    29         self.views = []
    24         self.views_dict = SortedDict()
    30         self.lists = []
       
    31         self.lists_by_id = {}
       
    32         self.lists_dict = SortedDict()
    25         self.lists_dict = SortedDict()
    33         self.serialize_contents = from_contents
    26         self.serialize_contents = from_contents
    34         self.from_display = from_display
    27         self.from_display = from_display
       
    28         self.display_contents_list = []
       
    29         self.display_cuttings_list = []
       
    30         self.display_ensemble_list = []
       
    31         
       
    32         
       
    33     def __parse_views(self, display_node_list):
       
    34         for display_node in display_node_list:
       
    35             display_id = display_node.get(u"id", None)
       
    36             if not display_id:
       
    37                 continue
       
    38             content_list = []
       
    39             cuttings_list = []
       
    40             new_display = {
       
    41                 "id": display_id,
       
    42                 "contents": content_list,
       
    43                 "annotation_types": cuttings_list,
       
    44             }
       
    45             
       
    46             for content_node in display_node.xpath("content"):
       
    47                 content_id = content_node.get("id")
       
    48                 if content_id not in content_list:
       
    49                     content_list.append(content_id)                    
       
    50                 if content_id not in self.display_contents_list:
       
    51                     self.display_contents_list.append(content_id)
       
    52                 for cutting_node  in content_node.xpath("decoupage"):
       
    53                     cutting_id = cutting_node.get("id")
       
    54                     if cutting_id not in cuttings_list:
       
    55                         cuttings_list.append(cutting_id)
       
    56                     if cutting_id not in self.display_cuttings_list:
       
    57                         self.display_cuttings_list.append(cutting_id)
       
    58                     ensemble_id = cutting_node.get("idens")
       
    59                     if ensemble_id not in self.display_ensemble_list:
       
    60                         self.display_ensemble_list.append(ensemble_id)
       
    61             self.views_dict[display_id] = new_display
       
    62                     
    35         
    63         
    36     
    64     
    37     def __parse_ensemble(self, ensemble_node, content):
    65     def __parse_ensemble(self, ensemble_node, content):
    38         
    66         
    39         ensemble_id = ensemble_node.attrib[u"id"]
    67         ensemble_id = ensemble_node.attrib[u"id"]
    63         for decoupage_node in ensemble_node:
    91         for decoupage_node in ensemble_node:
    64             if decoupage_node.tag != "decoupage" :
    92             if decoupage_node.tag != "decoupage" :
    65                 continue
    93                 continue
    66             
    94             
    67             decoupage_id = decoupage_node.attrib[ u"id"]
    95             decoupage_id = decoupage_node.attrib[ u"id"]
       
    96             if self.from_display and decoupage_id not in self.display_cuttings_list:
       
    97                 continue
    68             decoupage_creator = decoupage_node.attrib[u"author"]
    98             decoupage_creator = decoupage_node.attrib[u"author"]
    69             if not decoupage_creator:
    99             if not decoupage_creator:
    70                 decoupage_creator = "IRI"
   100                 decoupage_creator = "IRI"
    71             decoupage_contributor = decoupage_creator
   101             decoupage_contributor = decoupage_creator
    72             date_str = decoupage_node.get(u"date")
   102             date_str = decoupage_node.get(u"date")
    87                     decoupage_title += txtRes
   117                     decoupage_title += txtRes
    88 
   118 
    89             decoupage_description = ""
   119             decoupage_description = ""
    90             for txtRes in decoupage_node.xpath("abstract/text()", smart_strings=False): 
   120             for txtRes in decoupage_node.xpath("abstract/text()", smart_strings=False): 
    91                     decoupage_description += txtRes
   121                     decoupage_description += txtRes
    92             
       
    93 
   122 
    94             
   123             
    95             list_items.append({"id-ref":decoupage_id})
   124             list_items.append({"id-ref":decoupage_id})
    96             
   125             
    97             new_annotation_types = {
   126             new_annotation_types = {
   102                 "dc:modified":decoupage_modified,
   131                 "dc:modified":decoupage_modified,
   103                 "dc:title":decoupage_title,
   132                 "dc:title":decoupage_title,
   104                 "dc:description":decoupage_description
   133                 "dc:description":decoupage_description
   105             }
   134             }
   106             
   135             
   107             self.annotation_types.append(new_annotation_types)
   136             self.annotation_types_dict[decoupage_id] = new_annotation_types
   108             self.annotation_types_by_id[decoupage_id] = new_annotation_types
       
   109             annotations_list = []
       
   110             
       
   111             self.annotations_by_annotation_type[decoupage_id] = annotations_list
       
   112                         
   137                         
   113             res = decoupage_node.xpath("elements/element")
   138             res = decoupage_node.xpath("elements/element")
   114             for element_node in res:
   139             for element_node in res:
   115                 
   140                 
   116                 element_id = element_node.attrib[u"id"]
   141                 element_id = element_node.attrib[u"id"]
   166                                 "dc:modified": tag_date,
   191                                 "dc:modified": tag_date,
   167                                 "dc:title":tag_title
   192                                 "dc:title":tag_title
   168                             }
   193                             }
   169                         }
   194                         }
   170                         self.tags[tag_title] = new_tag
   195                         self.tags[tag_title] = new_tag
   171                         self.tags_by_id[tag_id] = new_tag
   196                         self.tags_dict[tag_id] = new_tag
   172                     element_tags.append({"id-ref":tag_id})
   197                     element_tags.append({"id-ref":tag_id})
   173 
   198 
   174                 if not element_tags:
   199                 if not element_tags:
   175                     element_tags = None
   200                     element_tags = None
   176                     
   201                     
   198                         "dc:created": decoupage_created,
   223                         "dc:created": decoupage_created,
   199                         "dc:modified": decoupage_modified
   224                         "dc:modified": decoupage_modified
   200                     }
   225                     }
   201                 }
   226                 }
   202                 
   227                 
   203                 self.annotations.append(new_annotation)
   228                 self.annotations_dict[element_id] = new_annotation
   204                 annotations_list.append(new_annotation)
       
   205         
   229         
   206         if not list_items:
   230         if not list_items:
   207             new_list["items"] = None
   231             new_list["items"] = None
   208         self.lists.append(new_list)
   232         self.lists_dict[ensemble_id] = new_list
   209         self.lists_by_id[ensemble_id] = new_list
       
   210 
   233 
   211 
   234 
   212     def __parse_ldt(self):
   235     def __parse_ldt(self):
   213         
   236         
   214         self.ldt_doc = lxml.etree.fromstring(self.project.ldt.encode("utf-8"))
   237         self.ldt_doc = lxml.etree.fromstring(self.project.ldt.encode("utf-8"))
       
   238         
       
   239         if self.from_display:
       
   240             xpath_str = "/iri/displays/display[position()=1]"
       
   241             if isinstance(self.from_display, basestring):
       
   242                 xpath_str = "/iri/displays/display[@id='%s']" % self.from_display
       
   243             
       
   244             logging.debug("xpath_str " + xpath_str)
       
   245             self.__parse_views(self.ldt_doc.xpath(xpath_str))
       
   246             logging.debug("xpath_str " + repr(self.views_dict))
   215         
   247         
   216         res = self.ldt_doc.xpath("/iri/medias/media")
   248         res = self.ldt_doc.xpath("/iri/medias/media")
   217         for mediaNode in res:
   249         for mediaNode in res:
   218             iri_id = mediaNode.attrib[u"id"]
   250             iri_id = mediaNode.attrib[u"id"]
       
   251             if self.from_display and iri_id not in self.display_contents_list:
       
   252                 continue
   219             content = Content.objects.get(iri_id=iri_id)
   253             content = Content.objects.get(iri_id=iri_id)
   220             self.__parse_content(content)
   254             self.__parse_content(content)
   221             
   255             
   222         res = self.ldt_doc.xpath("/iri/annotations/content")
   256         res = self.ldt_doc.xpath("/iri/annotations/content")
   223         for content_node in res:
   257         for content_node in res:
   224             content_id = content_node.attrib[u"id"]
   258             content_id = content_node.attrib[u"id"]
       
   259             if self.from_display and content_id not in self.display_contents_list:
       
   260                 continue
   225             content = Content.objects.get(iri_id=content_id)
   261             content = Content.objects.get(iri_id=content_id)
   226             for ensemble_node in content_node:
   262             for ensemble_node in content_node:
   227                 if ensemble_node.tag != "ensemble" :
   263                 if ensemble_node.tag != "ensemble" :
   228                     continue
   264                     continue
       
   265                 ensemble_id = ensemble_node.get("id")
       
   266                 if self.from_display and ensemble_id not in self.display_ensemble_list:
       
   267                     continue
   229                 self.__parse_ensemble(ensemble_node, content)            
   268                 self.__parse_ensemble(ensemble_node, content)            
   230 
       
   231         if self.from_display :
       
   232             annotations = []
       
   233             annotation_types = []
       
   234             ensembles = []
       
   235             medias = []
       
   236             tags = []
       
   237             xpath_str = "/iri/displays/display[position()=1]/content"
       
   238             if isinstance(self.from_display, basestring):
       
   239                 xpath_str = "/iri/displays/display[id='%s']/content" % self.from_display
       
   240                 
       
   241             for content_node in self.ldt_doc.xpath(xpath_str):
       
   242                 content_id = content_node.get("id")
       
   243                 if content_id not in medias:
       
   244                     medias.append(content_id)
       
   245                 for node in content_node.xpath("decoupage"):
       
   246                     annotation_type_id = node.get('id')
       
   247                     ensemble_id = node.get('idens')
       
   248                     if annotation_type_id in self.annotations_by_annotation_type:
       
   249                         annot_list = self.annotations_by_annotation_type[annotation_type_id]                    
       
   250                         annotations.extend(annot_list)
       
   251                         for annot in annot_list:
       
   252                             if annot['tags']:
       
   253                                 for tag in annot['tags']:
       
   254                                     tag_id = tag['id-ref']
       
   255                                     if tag_id not in tags:
       
   256                                         tags.append(tag_id)                    
       
   257                         if annotation_type_id not in annotation_types:
       
   258                             annotation_types.append(annotation_type_id)
       
   259                         if ensemble_id not in ensembles:
       
   260                             ensembles.append(ensemble_id)
       
   261             
       
   262             self.annotations = annotations
       
   263             self.annotation_types = map(lambda id: self.annotation_types_by_id[id], annotation_types)
       
   264             self.lists = map(lambda id: self.lists_by_id[id], ensembles)
       
   265             self.medias = map(lambda id: self.medias_by_id[id], medias)
       
   266             self.tags = {}
       
   267             for tag_id in tags:
       
   268                 tag_inst = self.tags_by_id[tag_id]
       
   269                 self.tags[tag_inst['meta']['dc:title']] = tag_inst
       
   270         
   269         
   271         self.parsed = True
   270         self.parsed = True
   272     
   271     
   273     def __parse_content(self, content):
   272     def __parse_content(self, content):
   274         
   273         
   321                      "value": content.videopath.rstrip('/') + "/"
   320                      "value": content.videopath.rstrip('/') + "/"
   322                  },
   321                  },
   323              }
   322              }
   324         }
   323         }
   325         
   324         
   326         self.medias.append(new_media)
       
   327         self.medias_by_id[content.iri_id] = new_media
       
   328         self.medias_dict[content.iri_id] = new_media
   325         self.medias_dict[content.iri_id] = new_media
   329         
   326         
   330         if self.serialize_contents:        
   327         if self.serialize_contents:        
   331             res = doc.xpath("/iri/body/ensembles/ensemble")
   328             res = doc.xpath("/iri/body/ensembles/ensemble")
   332             for ensemble_node in res:
   329             for ensemble_node in res:
   337     
   334     
   338         res = {}
   335         res = {}
   339         
   336         
   340         if not self.parsed:
   337         if not self.parsed:
   341             self.__parse_ldt()    
   338             self.__parse_ldt()    
       
   339 
   342         
   340         
   343         project_main_media = ""
   341         project_main_media = ""
   344         if len(self.medias) > 0:
   342         if len(self.medias_dict) > 0:
   345             project_main_media = self.medias[0]["id"]
   343             project_main_media = self.medias_dict.value_for_index(0)["id"]
   346         
   344         
   347         res['meta'] = {
   345         res['meta'] = {
   348              'id': self.project.ldt_id,
   346              'id': self.project.ldt_id,
   349              'dc:created':self.project.creation_date.isoformat(),
   347              'dc:created':self.project.creation_date.isoformat(),
   350              'dc:modified':self.project.modification_date.isoformat(),
   348              'dc:modified':self.project.modification_date.isoformat(),
   352              'dc:creator':self.project.created_by,
   350              'dc:creator':self.project.created_by,
   353              'dc:title':self.project.title,
   351              'dc:title':self.project.title,
   354              'dc:description':self.project.get_description(self.ldt_doc), # get from doc, parse ldt
   352              'dc:description':self.project.get_description(self.ldt_doc), # get from doc, parse ldt
   355              'main_media': {"id-ref":project_main_media}
   353              'main_media': {"id-ref":project_main_media}
   356             }
   354             }
   357         
   355                 
   358         if not self.medias:
   356                     
   359             self.medias = None
   357         res['medias'] =  self.medias_dict.values() if len(self.medias_dict) > 0 else None
   360                         
   358         res['annotation-types'] = self.annotation_types_dict.values() if len(self.annotation_types_dict) > 0 else None
   361         if not self.annotation_types:
   359         res['annotations'] = self.annotations_dict.values() if len(self.annotations_dict) > 0 else None 
   362             self.annotation_types = None
   360         res['lists'] = self.lists_dict.values() if len(self.lists_dict) > 0 else None
   363         
   361         res['tags'] = self.tags.values() if len(self.tags) > 0 else None
   364         if len(self.tags) == 0:
   362         res['views'] = self.views_dict.values() if len(self.views_dict) > 0 else None
   365             tags = None
   363         
   366         else:
   364 
   367             tags = self.tags.values()
       
   368             
       
   369         if not self.lists:
       
   370             self.lists = None
       
   371             
       
   372         if not self.views:
       
   373             self.views = None
       
   374             
       
   375         if not self.annotations:
       
   376             self.annotations = None
       
   377         
       
   378         res['medias'] =  self.medias
       
   379         res['annotation-types'] = self.annotation_types
       
   380         res['annotations'] = self.annotations
       
   381         res['lists'] = self.lists
       
   382         res['tags'] = tags
       
   383         res['views'] = self.views # ignored for the moment
       
   384         
       
   385         return res
   365         return res
   386     
   366     
   387     def getAnnotations(self, first_cutting=True):
   367     def getAnnotations(self, first_cutting=True):
   388         
   368         
   389         if not self.parsed:
   369         if not self.parsed:
   391         
   371         
   392         annotations = []
   372         annotations = []
   393         
   373         
   394         current_cutting = None
   374         current_cutting = None
   395         uri = None
   375         uri = None
   396         for annot in self.annotations:
   376         for annot in self.annotations_dict.values():
   397             if first_cutting and current_cutting and current_cuttings != annot['meta']['id-ref'] :
   377             if first_cutting and current_cutting and current_cuttings != annot['meta']['id-ref'] :
   398                 break
   378                 break
   399             current_cuttings = annot['meta']['id-ref']
   379             current_cuttings = annot['meta']['id-ref']
   400             content_id = annot['media']
   380             content_id = annot['media']
   401             content = Content.objects.get(iri_id=content_id)
   381             content = Content.objects.get(iri_id=content_id)
   402             if annot['tags']:
   382             if annot['tags']:
   403                 tags_list = map(lambda tag_entry: self.tags_by_id[tag_entry['id-ref']]['meta']['dc:title'],annot['tags'])
   383                 tags_list = map(lambda tag_entry: self.tags_dict[tag_entry['id-ref']]['meta']['dc:title'],annot['tags'])
   404             else:
   384             else:
   405                 tags_list = []
   385                 tags_list = []
   406             begin = int(annot['begin'])
   386             begin = int(annot['begin'])
   407             duration = int(annot['end'])-begin
   387             duration = int(annot['end'])-begin
   408             if content.media_obj and content.media_obj.external_publication_url:
   388             if content.media_obj and content.media_obj.external_publication_url: