diff -r 5007c248fbad -r c2294ac6e875 utils/export_annotations.py --- a/utils/export_annotations.py Thu Oct 16 12:59:29 2014 +0200 +++ b/utils/export_annotations.py Thu Oct 16 16:23:06 2014 +0200 @@ -11,7 +11,8 @@ import sys import time import uuid #@UnresolvedImport -from dateutil.parser import parse as parse_date +from dateutil.parser import parse as parse_date_raw +from dateutil.tz import tzutc import bisect import logging @@ -26,6 +27,12 @@ LDT_PROJECT_REST_API_PATH = "api/ldt/1.0/projects/" DEFAULT_ANNOTATION_CHANNEL = 'ANNOT' +def parse_date(datestr): + res = parse_date_raw(datestr) + if res.tzinfo is None: + res = res.replace(tzinfo=tzutc()) + return res + def get_logger(): return logging.getLogger(__name__) @@ -123,9 +130,8 @@ def get_options(): - usage = "usage: %(prog)s [options]" - parser = argparse.ArgumentParser(usage) + parser = argparse.ArgumentParser(description="All date should be given using iso8601 format. If no timezone is used, the date is considered as UTC") parser.add_argument("-f", "--file", dest="filename", help="write export to file", metavar="FILE", default="project.ldt") @@ -198,7 +204,7 @@ 'minutes': int(parts[1]), 'seconds': int(parts[2]) if len(parts)>2 else 0 } - return int(datetime.timedelta(**time_params).total_seconds()*1000) + return int(round(datetime.timedelta(**time_params).total_seconds()*1000)) def build_annotation_iterator(url, params, headers): page = 0 @@ -301,18 +307,16 @@ duration = params.get("duration", None) content_file = params.get("content_file", None) content_file_write = params.get("content_file_write", None) - channels = params.get('channels', [DEFAULT_ANNOTATION_CHANNEL]) - events = params.get('events', []) + channels = list(set(params.get('channels', [DEFAULT_ANNOTATION_CHANNEL]))) + events = list(set(params.get('events', []))) if user_whitelist_file: with open(user_whitelist_file, 'r+') as f: user_whitelist = list(set([s.strip() for s in f])) start_date = None - ts = None if start_date_str: - start_date = parse_date(start_date_str) - ts = time.mktime(start_date.timetuple()) + start_date= parse_date(start_date_str) root = None @@ -341,7 +345,7 @@ root = etree.Element(u"iri") - project = etree.SubElement(root, u"project", {u"abstract":u"Polemics Tweets",u"title":u"Polemic Tweets", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())}) + project = etree.SubElement(root, u"project", {u"abstract":u"Annotations",u"title":u"Annotations", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())}) medias = etree.SubElement(root, u"medias") media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""}) @@ -461,18 +465,17 @@ for annot in build_annotation_iterator(annotation_url, params, headers): - #TODO : check timezone !!! - annot_ts_dt = parse_date(annot['ts']) - annot_ts = int(time.mktime(annot_ts_dt.timetuple())) - if ts is None: - ts = annot_ts - annot_ts_rel = (annot_ts-ts) * 1000 + annot_ts = parse_date(annot['ts']) + if start_date is None: + star_date = annot_ts + annot_ts_rel = annot_ts-start_date + annot_ts_rel_milli = int(round(annot_ts_rel.total_seconds()*1000)) if deltas: - d = find_delta(deltas, annot_ts_rel) + d = find_delta(deltas, annot_ts_rel_milli) if d[1] < 0: continue else : - annot_ts_rel -= d[1] + annot_ts_rel_milli -= d[1] annot_content = annot.get('content',{'category':'', 'user':None}) username = annot_content.get('user', 'anon.') or 'anon.' @@ -481,7 +484,7 @@ if category is None: continue - element = etree.SubElement(elements, u"element" , {u"id":annot.get('uuid', uuid.uuid4()), u"color":unicode(options.color), u"author":unicode(username), u"date":unicode(annot_ts_dt.strftime("%Y/%m/%d")), u"begin": unicode(annot_ts_rel), u"dur":u"0"}) + element = etree.SubElement(elements, u"element" , {u"id":annot.get('uuid', uuid.uuid4()), u"color":unicode(options.color), u"author":unicode(username), u"date":unicode(annot_ts.strftime("%Y/%m/%d")), u"begin": unicode(annot_ts_rel_milli), u"dur":u"0"}) etree.SubElement(element, u"title").text = unicode(username) + u": " + unicode(category.get('label', category.get('code', ''))) etree.SubElement(element, u"abstract").text = unicode(category.get('label', category.get('code', '')))