tweet_live: comparison script/utils/export_twitter

equal deleted inserted replaced

-:8a5ed4265209
+:02722ce55cf8
 import os.path
 import re
 import sys
 import time
 import uuid #@UnresolvedImport
-from dateutil.parser import parse as parse_date
+from dateutil.parser import parse as parse_date_raw
+from dateutil.tz import tzutc
 import bisect
 #class TweetExclude(object):
 #    def __init__(self, id):
 #        self.id = id
 #    def __repr__(self):
 #        return "<TweetExclude(id=%d)>" % (self.id)
 LDT_CONTENT_REST_API_PATH = "api/ldt/1.0/contents/"
 LDT_PROJECT_REST_API_PATH = "api/ldt/1.0/projects/"
+def parse_date(datestr):
+res = parse_date_raw(datestr)
+if res.tzinfo is None:
+res = res.replace(tzinfo=tzutc())
+return res
 def re_fn(expr, item):
 reg = re.compile(expr, re.I)
 res = reg.search(item)
 else:
 return None
 def get_options():
-usage = "usage: %(prog)s [options]"
+parser = argparse.ArgumentParser(description="All date should be given using iso8601 format. If no timezone is used, the date is considered as UTC")
-parser = argparse.ArgumentParser(usage)
 parser.add_argument("-f", "--file", dest="filename",
 help="write export to file", metavar="FILE", default="project.ldt")
 parser.add_argument("-d", "--database", dest="database",
 help="Input database", metavar="DATABASE")
 time_params = {
 'hours': int(parts[0]),
 'minutes': int(parts[1]),
 'seconds': int(parts[2]) if len(parts)>2 else 0
 }
-return int(datetime.timedelta(**time_params).total_seconds()*1000)
+return int(round(datetime.timedelta(**time_params).total_seconds()*1000))
 if __name__ == "__main__" :
 (options, parser) = get_options()
 start_date_str = params.get("start_date",None)
 end_date_str = params.get("end_date", None)
 duration = params.get("duration", None)
 content_file = params.get("content_file", None)
 content_file_write = params.get("content_file_write", None)
-hashtags = params.get('hashtags', [])
+hashtags = list(set(params.get('hashtags', [])))
 if user_whitelist_file:
 with open(user_whitelist_file, 'r+') as f:
 user_whitelist = list(set([s.strip() for s in f]))
 start_date = None
-ts = None
 if start_date_str:
 start_date = parse_date(start_date_str)
-ts = time.mktime(start_date.timetuple())
 root = None
 ensemble_parent = None
 #to do : analyse situation ldt or iri ? filename set or not ?
 query_res = query.all()
 for tw in query_res:
 tweet_ts_dt = tw.created_at
-tweet_ts = int(time.mktime(tweet_ts_dt.timetuple()))
+if tweet_ts_dt.tzinfo is None:
-if ts is None:
+tweet_ts_dt = tweet_ts_dt.replace(tzinfo=tzutc())
-ts = tweet_ts
+if start_date is None:
-tweet_ts_rel = (tweet_ts-ts) * 1000
+start_date = tweet_ts_dt
+tweet_ts_rel = tweet_ts_dt-start_date
+tweet_ts_rel_milli = int(round(tweet_ts_rel.total_seconds() * 1000))
 if deltas:
-d = find_delta(deltas, tweet_ts_rel)
+d = find_delta(deltas, tweet_ts_rel_milli)
 if d[1] < 0:
 continue
 else :
-tweet_ts_rel -= d[1]
+tweet_ts_rel_milli -= d[1]
 username = None
 profile_url = ""
 if tw.user is not None:
 username = tw.user.screen_name
 profile_url = tw.user.profile_image_url if tw.user.profile_image_url is not None else ""
 if not username:
 username = "anon."
-element = etree.SubElement(elements, u"element" , {u"id":unicode(uuid.uuid4())+u"-"+unicode(tw.id), u"color":unicode(options.color), u"author":unicode(username), u"date":unicode(tweet_ts_dt.strftime("%Y/%m/%d")), u"begin": unicode(tweet_ts_rel), u"dur":u"0", u"src":unicode(profile_url)})
+element = etree.SubElement(elements, u"element" , {u"id":unicode(uuid.uuid4())+u"-"+unicode(tw.id), u"color":unicode(options.color), u"author":unicode(username), u"date":unicode(tweet_ts_dt.strftime("%Y/%m/%d")), u"begin": unicode(tweet_ts_rel_milli), u"dur":u"0", u"src":unicode(profile_url)})
 etree.SubElement(element, u"title").text = unicode(username) + u": " + unicode(tw.text)
 etree.SubElement(element, u"abstract").text = unicode(tw.text)
 tags_node = etree.SubElement(element, u"tags")

changeset 1153	02722ce55cf8
parent 1024	44636bcf3ea8
child 1295	03d2aa7b4967