23 # return "<TweetExclude(id=%d)>" % (self.id) |
24 # return "<TweetExclude(id=%d)>" % (self.id) |
24 |
25 |
25 LDT_CONTENT_REST_API_PATH = "api/ldt/1.0/contents/" |
26 LDT_CONTENT_REST_API_PATH = "api/ldt/1.0/contents/" |
26 LDT_PROJECT_REST_API_PATH = "api/ldt/1.0/projects/" |
27 LDT_PROJECT_REST_API_PATH = "api/ldt/1.0/projects/" |
27 DEFAULT_ANNOTATION_CHANNEL = 'ANNOT' |
28 DEFAULT_ANNOTATION_CHANNEL = 'ANNOT' |
|
29 |
|
30 def parse_date(datestr): |
|
31 res = parse_date_raw(datestr) |
|
32 if res.tzinfo is None: |
|
33 res = res.replace(tzinfo=tzutc()) |
|
34 return res |
28 |
35 |
29 def get_logger(): |
36 def get_logger(): |
30 return logging.getLogger(__name__) |
37 return logging.getLogger(__name__) |
31 |
38 |
32 |
39 |
121 help="quiet", default=0) |
128 help="quiet", default=0) |
122 |
129 |
123 |
130 |
124 def get_options(): |
131 def get_options(): |
125 |
132 |
126 usage = "usage: %(prog)s [options]" |
133 |
127 |
134 parser = argparse.ArgumentParser(description="All date should be given using iso8601 format. If no timezone is used, the date is considered as UTC") |
128 parser = argparse.ArgumentParser(usage) |
|
129 |
135 |
130 parser.add_argument("-f", "--file", dest="filename", |
136 parser.add_argument("-f", "--file", dest="filename", |
131 help="write export to file", metavar="FILE", default="project.ldt") |
137 help="write export to file", metavar="FILE", default="project.ldt") |
132 parser.add_argument("-a", "--annot-url", dest="annot_url", |
138 parser.add_argument("-a", "--annot-url", dest="annot_url", |
133 help="annotation server url", metavar="ANNOT-URL", required=True) |
139 help="annotation server url", metavar="ANNOT-URL", required=True) |
196 time_params = { |
202 time_params = { |
197 'hours': int(parts[0]), |
203 'hours': int(parts[0]), |
198 'minutes': int(parts[1]), |
204 'minutes': int(parts[1]), |
199 'seconds': int(parts[2]) if len(parts)>2 else 0 |
205 'seconds': int(parts[2]) if len(parts)>2 else 0 |
200 } |
206 } |
201 return int(datetime.timedelta(**time_params).total_seconds()*1000) |
207 return int(round(datetime.timedelta(**time_params).total_seconds()*1000)) |
202 |
208 |
203 def build_annotation_iterator(url, params, headers): |
209 def build_annotation_iterator(url, params, headers): |
204 page = 0 |
210 page = 0 |
205 page_nb = 1 |
211 page_nb = 1 |
206 while page < page_nb: |
212 while page < page_nb: |
299 start_date_str = params.get("start_date",None) |
305 start_date_str = params.get("start_date",None) |
300 end_date_str = params.get("end_date", None) |
306 end_date_str = params.get("end_date", None) |
301 duration = params.get("duration", None) |
307 duration = params.get("duration", None) |
302 content_file = params.get("content_file", None) |
308 content_file = params.get("content_file", None) |
303 content_file_write = params.get("content_file_write", None) |
309 content_file_write = params.get("content_file_write", None) |
304 channels = params.get('channels', [DEFAULT_ANNOTATION_CHANNEL]) |
310 channels = list(set(params.get('channels', [DEFAULT_ANNOTATION_CHANNEL]))) |
305 events = params.get('events', []) |
311 events = list(set(params.get('events', []))) |
306 |
312 |
307 if user_whitelist_file: |
313 if user_whitelist_file: |
308 with open(user_whitelist_file, 'r+') as f: |
314 with open(user_whitelist_file, 'r+') as f: |
309 user_whitelist = list(set([s.strip() for s in f])) |
315 user_whitelist = list(set([s.strip() for s in f])) |
310 |
316 |
311 start_date = None |
317 start_date = None |
312 ts = None |
|
313 if start_date_str: |
318 if start_date_str: |
314 start_date = parse_date(start_date_str) |
319 start_date= parse_date(start_date_str) |
315 ts = time.mktime(start_date.timetuple()) |
|
316 |
320 |
317 |
321 |
318 root = None |
322 root = None |
319 ensemble_parent = None |
323 ensemble_parent = None |
320 |
324 |
339 |
343 |
340 if root is None: |
344 if root is None: |
341 |
345 |
342 root = etree.Element(u"iri") |
346 root = etree.Element(u"iri") |
343 |
347 |
344 project = etree.SubElement(root, u"project", {u"abstract":u"Polemics Tweets",u"title":u"Polemic Tweets", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())}) |
348 project = etree.SubElement(root, u"project", {u"abstract":u"Annotations",u"title":u"Annotations", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())}) |
345 |
349 |
346 medias = etree.SubElement(root, u"medias") |
350 medias = etree.SubElement(root, u"medias") |
347 media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""}) |
351 media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""}) |
348 |
352 |
349 annotations = etree.SubElement(root, u"annotations") |
353 annotations = etree.SubElement(root, u"annotations") |
459 |
463 |
460 params = { 'q':json.dumps({'filters':filters}), 'results_per_page': options.batch_size} |
464 params = { 'q':json.dumps({'filters':filters}), 'results_per_page': options.batch_size} |
461 |
465 |
462 |
466 |
463 for annot in build_annotation_iterator(annotation_url, params, headers): |
467 for annot in build_annotation_iterator(annotation_url, params, headers): |
464 #TODO : check timezone !!! |
468 annot_ts = parse_date(annot['ts']) |
465 annot_ts_dt = parse_date(annot['ts']) |
469 if start_date is None: |
466 annot_ts = int(time.mktime(annot_ts_dt.timetuple())) |
470 star_date = annot_ts |
467 if ts is None: |
471 annot_ts_rel = annot_ts-start_date |
468 ts = annot_ts |
472 annot_ts_rel_milli = int(round(annot_ts_rel.total_seconds()*1000)) |
469 annot_ts_rel = (annot_ts-ts) * 1000 |
|
470 if deltas: |
473 if deltas: |
471 d = find_delta(deltas, annot_ts_rel) |
474 d = find_delta(deltas, annot_ts_rel_milli) |
472 if d[1] < 0: |
475 if d[1] < 0: |
473 continue |
476 continue |
474 else : |
477 else : |
475 annot_ts_rel -= d[1] |
478 annot_ts_rel_milli -= d[1] |
476 annot_content = annot.get('content',{'category':'', 'user':None}) |
479 annot_content = annot.get('content',{'category':'', 'user':None}) |
477 |
480 |
478 username = annot_content.get('user', 'anon.') or 'anon.' |
481 username = annot_content.get('user', 'anon.') or 'anon.' |
479 |
482 |
480 category = annot_content.get('category', None) |
483 category = annot_content.get('category', None) |
481 if category is None: |
484 if category is None: |
482 continue |
485 continue |
483 |
486 |
484 element = etree.SubElement(elements, u"element" , {u"id":annot.get('uuid', uuid.uuid4()), u"color":unicode(options.color), u"author":unicode(username), u"date":unicode(annot_ts_dt.strftime("%Y/%m/%d")), u"begin": unicode(annot_ts_rel), u"dur":u"0"}) |
487 element = etree.SubElement(elements, u"element" , {u"id":annot.get('uuid', uuid.uuid4()), u"color":unicode(options.color), u"author":unicode(username), u"date":unicode(annot_ts.strftime("%Y/%m/%d")), u"begin": unicode(annot_ts_rel_milli), u"dur":u"0"}) |
485 etree.SubElement(element, u"title").text = unicode(username) + u": " + unicode(category.get('label', category.get('code', ''))) |
488 etree.SubElement(element, u"title").text = unicode(username) + u": " + unicode(category.get('label', category.get('code', ''))) |
486 etree.SubElement(element, u"abstract").text = unicode(category.get('label', category.get('code', ''))) |
489 etree.SubElement(element, u"abstract").text = unicode(category.get('label', category.get('code', ''))) |
487 |
490 |
488 tags_node = etree.SubElement(element, u"tags") |
491 tags_node = etree.SubElement(element, u"tags") |
489 etree.SubElement(tags_node,u"tag").text = category.get('code', '') |
492 etree.SubElement(tags_node,u"tag").text = category.get('code', '') |