script/utils/create_chap_from_csv.py
changeset 1542 82b5f22448f6
equal deleted inserted replaced
1541:61423ca4e0af 1542:82b5f22448f6
       
     1 #!/usr/bin/env python
       
     2 # coding=utf-8
       
     3 
       
     4 import argparse
       
     5 import datetime
       
     6 import json
       
     7 import os.path
       
     8 import re
       
     9 import sys
       
    10 import uuid  # @UnresolvedImport
       
    11 import csv
       
    12 from dateutil.parser import parse
       
    13 
       
    14 import requests
       
    15 
       
    16 import dateutil.tz
       
    17 from iri_tweet.utils import get_logger, set_logging, set_logging_options
       
    18 from lxml import etree
       
    19 
       
    20 LDT_CONTENT_REST_API_PATH = "api/ldt/1.0/contents/"
       
    21 LDT_PROJECT_REST_API_PATH = "api/ldt/1.0/projects/"
       
    22 
       
    23 def get_options():
       
    24 
       
    25     parser = argparse.ArgumentParser(description="All date should be given using iso8601 format.")
       
    26 
       
    27     parser.add_argument("-f", "--file", dest="filename",
       
    28                       help="write export to file", metavar="FILE", default="project.ldt")
       
    29     parser.add_argument("-d", "--csv-file", dest="csv_file",
       
    30                       help="Input chap file", metavar="CSV_FILE")
       
    31     parser.add_argument("-I", "--content-file", dest="content_file",
       
    32                       help="Content file", metavar="CONTENT_FILE")
       
    33     parser.add_argument("-c", "--content", dest="content",
       
    34                       help="Content url", metavar="CONTENT")
       
    35     parser.add_argument("-V", "--video-url", dest="video",
       
    36                       help="video url", metavar="VIDEO")
       
    37     parser.add_argument("-i", "--content-id", dest="content_id",
       
    38                       help="Content id", metavar="CONTENT_ID")
       
    39     parser.add_argument("-C", "--color", dest="color",
       
    40                       help="Color code", metavar="COLOR", default="16763904")
       
    41     parser.add_argument("-n", "--name", dest="name",
       
    42                       help="Cutting name", metavar="NAME", default="chap")
       
    43     parser.add_argument("-R", "--replace", dest="replace", action="store_true",
       
    44                       help="Replace ensemble", default=False)
       
    45     parser.add_argument("-m", "--merge", dest="merge", action="store_true",
       
    46                       help="merge ensemble, choose the first ensemble", default=False)
       
    47     parser.add_argument("-b", "--base-url", dest="base_url",
       
    48                       help="base URL of the platform", metavar="BASE_URL", default="http://ldt.iri.centrepompidou.fr/ldtplatform/")
       
    49     parser.add_argument("-p", "--project", dest="project_id",
       
    50                       help="Project id", metavar="PROJECT_ID", default=None)
       
    51     parser.add_argument("-P", "--post-param", dest="post_param",
       
    52                       help="Post param", metavar="POST_PARAM", default=None)
       
    53 
       
    54     set_logging_options(parser)
       
    55 
       
    56     return (parser.parse_args(), parser)
       
    57 
       
    58 
       
    59 if __name__ == "__main__" :
       
    60 
       
    61     (options, parser) = get_options()
       
    62 
       
    63     set_logging(options)
       
    64 
       
    65     get_logger().debug("OPTIONS : " + repr(options)) #@UndefinedVariable
       
    66 
       
    67 
       
    68     if len(sys.argv) == 1 or options.csv_file is None or not options.csv_file.strip():
       
    69         parser.print_help()
       
    70         sys.exit(1)
       
    71 
       
    72     if options.project_id:
       
    73         content_file = options.base_url + LDT_PROJECT_REST_API_PATH + options.project_id + "/?format=json"
       
    74     else:
       
    75         content_file = options.content_file
       
    76     parameters = [{
       
    77         # 'start_date': options.start_date,
       
    78         # 'end_date' : options.end_date,
       
    79         # 'duration' : options.duration,
       
    80         'content_file' : content_file,
       
    81         'content_file_write' : content_file,
       
    82         # 'hashtags' : options.hashtag,
       
    83         'project_id' : options.project_id
       
    84     }]
       
    85     post_param = {}
       
    86     if options.post_param:
       
    87         post_param = json.loads(options.post_param)
       
    88 
       
    89     display_content_node = None
       
    90     for params in parameters:
       
    91 
       
    92         get_logger().debug("PARAMETERS " + repr(params)) #@UndefinedVariable
       
    93 
       
    94         content_file = params.get("content_file", None)
       
    95         content_file_write = params.get("content_file_write", None)
       
    96 
       
    97         root = None
       
    98         ensemble_parent = None
       
    99         project = None
       
   100 
       
   101         #to do : analyse situation ldt or iri ? filename set or not ?
       
   102 
       
   103         if content_file and content_file.find("http") == 0:
       
   104 
       
   105             get_logger().debug("url : " + content_file) #@UndefinedVariable
       
   106 
       
   107             r = requests.get(content_file, params=post_param)
       
   108             get_logger().debug("url response " + repr(r) + " content " + repr(r.text)) #@UndefinedVariable
       
   109             project = r.json()
       
   110             text_match = re.match(r"\<\?\s*xml.*?\?\>(.*)", project['ldt'], re.I|re.S)
       
   111             root = etree.fromstring(text_match.group(1) if text_match else project['ldt'])
       
   112 
       
   113         elif content_file and os.path.exists(content_file):
       
   114 
       
   115             doc = etree.parse(content_file)
       
   116             root = doc.getroot()
       
   117             for child in root:
       
   118                 if child.tag == "project":
       
   119                     project = child
       
   120                     break
       
   121             if project is None:
       
   122                 root = None
       
   123 
       
   124         content_id = None
       
   125 
       
   126         if root is None:
       
   127 
       
   128             root = etree.Element("iri")
       
   129 
       
   130             project = etree.SubElement(root, "project", {"abstract":"Chapitrage","title":"Chapitrage", "user":"IRI Web", "id":str(uuid.uuid4())})
       
   131 
       
   132             medias = etree.SubElement(root, "medias")
       
   133             media = etree.SubElement(medias, "media", {"pict":"", "src":options.content, "video":options.video, "id":options.content_id, "extra":""})
       
   134 
       
   135             annotations = etree.SubElement(root, "annotations")
       
   136             content = etree.SubElement(annotations, "content", {"id":options.content_id})
       
   137             ensemble_parent = content
       
   138 
       
   139             content_id = options.content_id
       
   140 
       
   141 
       
   142         if ensemble_parent is None:
       
   143             file_type = None
       
   144             for node in root:
       
   145                 if node.tag == "project":
       
   146                     file_type = "ldt"
       
   147                     break
       
   148                 elif node.tag == "head":
       
   149                     file_type = "iri"
       
   150                     break
       
   151 
       
   152             if file_type == "ldt":
       
   153                 media_nodes = root.xpath("//media")
       
   154                 media = None
       
   155                 if len(media_nodes) > 0:
       
   156                     media = media_nodes[0]
       
   157                 annotations_node = root.find("annotations")
       
   158                 if annotations_node is None:
       
   159                     annotations_node = etree.SubElement(root, "annotations")
       
   160                 content_node = annotations_node.find("content")
       
   161                 if content_node is None and media is not None:
       
   162                     content_node = etree.SubElement(annotations_node,"content", id=media.get("id"))
       
   163                 ensemble_parent = content_node
       
   164                 content_id = content_node.get("id")
       
   165                 display_nodes = root.xpath("//displays/display/content[@id='%s']" % content_id)
       
   166                 if len(display_nodes) == 0:
       
   167                     get_logger().info("No display node found. Will not update display")
       
   168                     display_content_node = None
       
   169                 else:
       
   170                     display_content_node = display_nodes[0]
       
   171 
       
   172             elif file_type == "iri":
       
   173                 body_node = root.find("body")
       
   174                 if body_node is None:
       
   175                     body_node = etree.SubElement(root, "body")
       
   176                 ensembles_node = body_node.find("ensembles")
       
   177                 if ensembles_node is None:
       
   178                     ensembles_node = etree.SubElement(body_node, "ensembles")
       
   179                 ensemble_parent = ensembles_node
       
   180                 content_id = root.xpath("head/meta[@name='id']/@content")[0]
       
   181                 display_content_node = None
       
   182 
       
   183 
       
   184         if ensemble_parent is None:
       
   185             get_logger().error("Can not process file") #@UndefinedVariable
       
   186             sys.exit()
       
   187 
       
   188         if options.replace:
       
   189             for ens in ensemble_parent.iterchildren(tag="ensemble"):
       
   190                 ens_id = ens.get("id","")
       
   191                 if ens_id.startswith("chap_"):
       
   192                     ensemble_parent.remove(ens)
       
   193                     # remove in display nodes
       
   194                     if display_content_node is not None:
       
   195                         for cut_display in display_content_node.iterchildren():
       
   196                             if cut_display.get('idens','') == ens_id:
       
   197                                 display_content_node.remove(cut_display)
       
   198 
       
   199         ensemble = None
       
   200         elements = None
       
   201         decoupage = None
       
   202 
       
   203         if options.merge:
       
   204             for ens in ensemble_parent.findall("ensemble"):
       
   205                 if ens.get('id',"").startswith("chap_"):
       
   206                     ensemble = ens
       
   207                     break
       
   208             if ensemble is not None:
       
   209                 elements = ensemble.find(".//elements")
       
   210                 decoupage = ensemble.find("decoupage")
       
   211 
       
   212         if ensemble is None or elements is None:
       
   213             ensemble = etree.SubElement(ensemble_parent, "ensemble", {"id":"chap_" + str(uuid.uuid4()), "title":"Ensemble Chapitrage", "author":"IRI Web", "abstract":"Ensemble Chapitrage"})
       
   214             decoupage = etree.SubElement(ensemble, "decoupage", {"id": str(uuid.uuid4()), "author": "IRI Web"})
       
   215 
       
   216             etree.SubElement(decoupage, "title").text = options.name
       
   217             etree.SubElement(decoupage, "abstract").text = options.name
       
   218 
       
   219             elements = etree.SubElement(decoupage, "elements")
       
   220 
       
   221         ensemble_id = ensemble.get('id', '')
       
   222         decoupage_id = decoupage.get('id', '') if decoupage is not None else None
       
   223 
       
   224         with open(options.csv_file.strip()) as csvfilein:
       
   225             chap_reader = csv.DictReader(csvfilein, delimiter=';')
       
   226             for i,chap_row in enumerate(chap_reader):
       
   227 
       
   228                 ts_start = chap_row['START']
       
   229                 dur = int(chap_row['END'])-int(ts_start)
       
   230                 username = "IRI"
       
   231                 color = "%s"%(int(chap_row['COLOR'].strip("#").lower(),16)) if chap_row['COLOR'] else options.color
       
   232                 title = chap_row['TITLE']
       
   233                 desc = chap_row['DESCRIPTION']
       
   234                 tags = chap_row['TAGS']
       
   235 
       
   236                 element = etree.SubElement(elements, "element" , {"id": "%s-%s" % (uuid.uuid4(),i), "color":color, "author":username, "date":datetime.datetime.now().strftime("%Y/%m/%d"), "begin": ts_start, "dur":str(dur), "src":"manual"})
       
   237                 etree.SubElement(element, "title").text = title[:255]
       
   238                 etree.SubElement(element, "abstract").text = desc
       
   239 
       
   240                 tags_node = etree.SubElement(element, "tags")
       
   241 
       
   242                 for tag in tags.split(","):
       
   243                     etree.SubElement(tags_node,"tag").text = tag.strip()
       
   244 
       
   245                 meta_element = etree.SubElement(element, 'meta')
       
   246 
       
   247                 etree.SubElement(meta_element, "source", attrib={"url":"http://www.iri.centrepompidou.fr", "mimetype":"text/plain"}).text = etree.CDATA(json.dumps({'row': chap_row}))
       
   248 
       
   249         # sort by tc in
       
   250         if options.merge :
       
   251             # remove all elements and put them in a array
       
   252             # sort them with tc
       
   253             #put them back
       
   254             elements[:] = sorted(elements,key=lambda n: int(n.get('begin')))
       
   255 
       
   256         #add to display node
       
   257         if display_content_node is not None:
       
   258             display_dec = None
       
   259             for dec in display_content_node.iterchildren(tag="decoupage"):
       
   260                 if dec.get('idens','') == ensemble_id and dec.get('id', '') == decoupage_id:
       
   261                     display_dec = dec
       
   262                     break
       
   263             if display_dec is None and ensemble_id and decoupage_id:
       
   264                 etree.SubElement(display_content_node, "decoupage", attrib={'idens': ensemble_id, 'id': decoupage_id, 'tagsSelect':''})
       
   265 
       
   266         output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=False, xml_declaration=True).decode('utf-8')
       
   267 
       
   268         if content_file_write and content_file_write.find("http") == 0:
       
   269 
       
   270             project["ldt"] = output_data
       
   271             project['owner'] = project['owner'].replace('%7E','~')
       
   272             project['contents'] = [c_url.replace('%7E','~') for c_url in project['contents']]
       
   273 
       
   274             post_param = {}
       
   275             if options.post_param:
       
   276                 post_param = json.loads(options.post_param)
       
   277 
       
   278             get_logger().debug("write http " + content_file_write) #@UndefinedVariable
       
   279             get_logger().debug("write http " + repr(post_param)) #@UndefinedVariable
       
   280             get_logger().debug("write http " + repr(project)) #@UndefinedVariable
       
   281             r = requests.put(content_file_write, data=json.dumps(project), headers={'content-type':'application/json'}, params=post_param)
       
   282             get_logger().debug("write http " + repr(r) + " content " + r.text) #@UndefinedVariable
       
   283             if r.status_code != requests.codes.ok:  # pylint: disable=E1101
       
   284                 r.raise_for_status()
       
   285         else:
       
   286             if content_file_write and os.path.exists(content_file_write):
       
   287                 dest_file_name = content_file_write
       
   288             else:
       
   289                 dest_file_name = options.filename
       
   290 
       
   291             get_logger().debug("WRITE : " + dest_file_name) #@UndefinedVariable
       
   292             output = open(dest_file_name, "w")
       
   293             output.write(output_data)
       
   294             output.flush()
       
   295             output.close()