14 import os.path |
14 import os.path |
15 import re |
15 import re |
16 import sys |
16 import sys |
17 import time |
17 import time |
18 import uuid #@UnresolvedImport |
18 import uuid #@UnresolvedImport |
19 from dateutil.parser import parse as parse_date |
19 from dateutil.parser import parse as parse_date_raw |
|
20 from dateutil.tz import tzutc |
20 import bisect |
21 import bisect |
21 |
22 |
22 #class TweetExclude(object): |
23 #class TweetExclude(object): |
23 # def __init__(self, id): |
24 # def __init__(self, id): |
24 # self.id = id |
25 # self.id = id |
26 # def __repr__(self): |
27 # def __repr__(self): |
27 # return "<TweetExclude(id=%d)>" % (self.id) |
28 # return "<TweetExclude(id=%d)>" % (self.id) |
28 |
29 |
29 LDT_CONTENT_REST_API_PATH = "api/ldt/1.0/contents/" |
30 LDT_CONTENT_REST_API_PATH = "api/ldt/1.0/contents/" |
30 LDT_PROJECT_REST_API_PATH = "api/ldt/1.0/projects/" |
31 LDT_PROJECT_REST_API_PATH = "api/ldt/1.0/projects/" |
|
32 |
|
33 def parse_date(datestr): |
|
34 res = parse_date_raw(datestr) |
|
35 if res.tzinfo is None: |
|
36 res = res.replace(tzinfo=tzutc()) |
|
37 return res |
31 |
38 |
32 |
39 |
33 def re_fn(expr, item): |
40 def re_fn(expr, item): |
34 reg = re.compile(expr, re.I) |
41 reg = re.compile(expr, re.I) |
35 res = reg.search(item) |
42 res = reg.search(item) |
63 else: |
70 else: |
64 return None |
71 return None |
65 |
72 |
66 def get_options(): |
73 def get_options(): |
67 |
74 |
68 usage = "usage: %(prog)s [options]" |
75 parser = argparse.ArgumentParser(description="All date should be given using iso8601 format. If no timezone is used, the date is considered as UTC") |
69 |
|
70 parser = argparse.ArgumentParser(usage) |
|
71 |
76 |
72 parser.add_argument("-f", "--file", dest="filename", |
77 parser.add_argument("-f", "--file", dest="filename", |
73 help="write export to file", metavar="FILE", default="project.ldt") |
78 help="write export to file", metavar="FILE", default="project.ldt") |
74 parser.add_argument("-d", "--database", dest="database", |
79 parser.add_argument("-d", "--database", dest="database", |
75 help="Input database", metavar="DATABASE") |
80 help="Input database", metavar="DATABASE") |
136 time_params = { |
141 time_params = { |
137 'hours': int(parts[0]), |
142 'hours': int(parts[0]), |
138 'minutes': int(parts[1]), |
143 'minutes': int(parts[1]), |
139 'seconds': int(parts[2]) if len(parts)>2 else 0 |
144 'seconds': int(parts[2]) if len(parts)>2 else 0 |
140 } |
145 } |
141 return int(datetime.timedelta(**time_params).total_seconds()*1000) |
146 return int(round(datetime.timedelta(**time_params).total_seconds()*1000)) |
142 |
147 |
143 |
148 |
144 if __name__ == "__main__" : |
149 if __name__ == "__main__" : |
145 |
150 |
146 (options, parser) = get_options() |
151 (options, parser) = get_options() |
265 start_date_str = params.get("start_date",None) |
270 start_date_str = params.get("start_date",None) |
266 end_date_str = params.get("end_date", None) |
271 end_date_str = params.get("end_date", None) |
267 duration = params.get("duration", None) |
272 duration = params.get("duration", None) |
268 content_file = params.get("content_file", None) |
273 content_file = params.get("content_file", None) |
269 content_file_write = params.get("content_file_write", None) |
274 content_file_write = params.get("content_file_write", None) |
270 hashtags = params.get('hashtags', []) |
275 hashtags = list(set(params.get('hashtags', []))) |
271 |
276 |
272 if user_whitelist_file: |
277 if user_whitelist_file: |
273 with open(user_whitelist_file, 'r+') as f: |
278 with open(user_whitelist_file, 'r+') as f: |
274 user_whitelist = list(set([s.strip() for s in f])) |
279 user_whitelist = list(set([s.strip() for s in f])) |
275 |
280 |
276 start_date = None |
281 start_date = None |
277 ts = None |
|
278 if start_date_str: |
282 if start_date_str: |
279 start_date = parse_date(start_date_str) |
283 start_date = parse_date(start_date_str) |
280 ts = time.mktime(start_date.timetuple()) |
|
281 |
|
282 |
284 |
283 root = None |
285 root = None |
284 ensemble_parent = None |
286 ensemble_parent = None |
285 |
287 |
286 #to do : analyse situation ldt or iri ? filename set or not ? |
288 #to do : analyse situation ldt or iri ? filename set or not ? |
420 query_res = query.all() |
422 query_res = query.all() |
421 |
423 |
422 |
424 |
423 for tw in query_res: |
425 for tw in query_res: |
424 tweet_ts_dt = tw.created_at |
426 tweet_ts_dt = tw.created_at |
425 tweet_ts = int(time.mktime(tweet_ts_dt.timetuple())) |
427 if tweet_ts_dt.tzinfo is None: |
426 if ts is None: |
428 tweet_ts_dt = tweet_ts_dt.replace(tzinfo=tzutc()) |
427 ts = tweet_ts |
429 if start_date is None: |
428 tweet_ts_rel = (tweet_ts-ts) * 1000 |
430 start_date = tweet_ts_dt |
|
431 tweet_ts_rel = tweet_ts_dt-start_date |
|
432 tweet_ts_rel_milli = int(round(tweet_ts_rel.total_seconds() * 1000)) |
429 if deltas: |
433 if deltas: |
430 d = find_delta(deltas, tweet_ts_rel) |
434 d = find_delta(deltas, tweet_ts_rel_milli) |
431 if d[1] < 0: |
435 if d[1] < 0: |
432 continue |
436 continue |
433 else : |
437 else : |
434 tweet_ts_rel -= d[1] |
438 tweet_ts_rel_milli -= d[1] |
435 |
439 |
436 username = None |
440 username = None |
437 profile_url = "" |
441 profile_url = "" |
438 if tw.user is not None: |
442 if tw.user is not None: |
439 username = tw.user.screen_name |
443 username = tw.user.screen_name |
440 profile_url = tw.user.profile_image_url if tw.user.profile_image_url is not None else "" |
444 profile_url = tw.user.profile_image_url if tw.user.profile_image_url is not None else "" |
441 if not username: |
445 if not username: |
442 username = "anon." |
446 username = "anon." |
443 |
447 |
444 element = etree.SubElement(elements, u"element" , {u"id":unicode(uuid.uuid4())+u"-"+unicode(tw.id), u"color":unicode(options.color), u"author":unicode(username), u"date":unicode(tweet_ts_dt.strftime("%Y/%m/%d")), u"begin": unicode(tweet_ts_rel), u"dur":u"0", u"src":unicode(profile_url)}) |
448 element = etree.SubElement(elements, u"element" , {u"id":unicode(uuid.uuid4())+u"-"+unicode(tw.id), u"color":unicode(options.color), u"author":unicode(username), u"date":unicode(tweet_ts_dt.strftime("%Y/%m/%d")), u"begin": unicode(tweet_ts_rel_milli), u"dur":u"0", u"src":unicode(profile_url)}) |
445 etree.SubElement(element, u"title").text = unicode(username) + u": " + unicode(tw.text) |
449 etree.SubElement(element, u"title").text = unicode(username) + u": " + unicode(tw.text) |
446 etree.SubElement(element, u"abstract").text = unicode(tw.text) |
450 etree.SubElement(element, u"abstract").text = unicode(tw.text) |
447 |
451 |
448 tags_node = etree.SubElement(element, u"tags") |
452 tags_node = etree.SubElement(element, u"tags") |
449 |
453 |