diff -r b7f4b0554ef8 -r bb44692e09ee script/rest/enmi_profile.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/script/rest/enmi_profile.py Tue Jan 11 11:17:17 2011 +0100 @@ -0,0 +1,134 @@ +import twython +from sqlite3 import * +import datetime, time +import email.utils +from optparse import OptionParser +import os.path +import os +import sys +import simplejson + + +#options filename rpp page total_pages start_date end_date + + + +def adapt_datetime(ts): + return time.mktime(ts.timetuple()) + +def adapt_geo(geo): + return simplejson.dumps(geo) + +def convert_geo(s): + return simplejson.loads(s) + + +register_adapter(datetime.datetime, adapt_datetime) +register_converter("geo", convert_geo) + +columns_tweet = [u'favorited', u'truncated', u'text', u'created_at', u'source', u'in_reply_to_status_id', u'in_reply_to_screen_name', u'in_reply_to_user_id', u'geo', u'id', u'user'] +columns_user = [u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'geo_enabled', u'profile_background_image_url', u'screen_name', u'profile_background_tile', u'favourites_count', u'name', u'url', u'created_at', u'time_zone', u'profile_sidebar_border_color', u'following'] + +def processDate(entry): + ts = email.utils.parsedate(entry["created_at"]) + entry["created_at_ts"] = datetime.datetime.fromtimestamp(time.mktime(ts)) + +def processPage(page, cursor, debug): + for entry in page: + if debug: + print "ENTRY : " + repr(entry) + curs.execute("select id from tweet_tweet where id = ?", (entry["id"],)) + res = curs.fetchone() + if res: + continue + + entry_user = entry["user"] + processDate(entry_user) + cursor.execute("insert into tweet_user ("+",".join(entry_user.keys())+") values (:"+",:".join(entry_user.keys())+");", entry_user); + new_id = cursor.lastrowid + processDate(entry) + entry["user"] = new_id + if entry["geo"]: + entry["geo"] = adapt_geo(entry["geo"]) + new_id = cursor.execute("insert into tweet_tweet ("+",".join(entry.keys())+") values (:"+",:".join(entry.keys())+");", entry); + + +if __name__ == "__main__" : + + parser = OptionParser() + parser.add_option("-f", "--file", dest="filename", + help="write tweet to FILE", metavar="FILE", default="enmi2010_twitter_rest.db") + parser.add_option("-r", "--rpp", dest="rpp", + help="Results per page", metavar="RESULT_PER_PAGE", default=200, type='int') + parser.add_option("-p", "--page", dest="page", + help="page result", metavar="PAGE", default=1, type='int') + parser.add_option("-t", "--total-page", dest="total_page", + help="Total page number", metavar="TOTAL_PAGE", default=16, type='int') + parser.add_option("-s", "--screenname", dest="screen_name", + help="Twitter screen name", metavar="SCREEN_NAME") + parser.add_option("-u", "--user", dest="username", + help="Twitter user", metavar="USER", default=None) + parser.add_option("-w", "--password", dest="password", + help="Twitter password", metavar="PASSWORD", default=None) + parser.add_option("-n", "--new", dest="new", action="store_true", + help="new database", default=False) + parser.add_option("-d", "--debug", dest="debug", action="store_true", + help="debug", default=False) + + + + (options, args) = parser.parse_args() + + if options.debug: + print "OPTIONS : " + print repr(options) + + if options.screen_name is None: + print "No Screen name. Exiting" + sys.exit() + + if options.new and os.path.exists(options.filename): + os.remove(options.filename) + + conn = connect(options.filename) + conn.row_factory = Row + curs = conn.cursor() + + curs.execute("create table if not exists tweet_user ("+ ",".join(columns_user) +", created_at_ts integer);") + + curs.execute("create table if not exists tweet_tweet ("+ ",".join(columns_tweet) +", created_at_ts integer);") + curs.execute("create index if not exists id_index on tweet_tweet (id asc);"); + + curs.execute("select count(*) from tweet_tweet;") + res = curs.fetchone() + + old_total = res[0] + + twitter = twython.setup(username=options.username, password=options.password, headers="IRI enmi (python urllib)") + twitter = twython.Twython(twitter_token = "54ThDZhpEjokcMgHJOMnQA", twitter_secret = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA") + + search_results = None + page = options.page-1 + + while (page < options.total_page and ( search_results is None or len(search_results) > 0)): + page += 1 + try: + search_results = twitter.getUserTimeline(screen_name=options.screen_name, count=options.rpp, page=page) + except twython.TwythonError, (e): + print "NAME : "+ options.screen_name + " ERROR : " + repr(e.msg) + break + print "NAME : "+ options.screen_name +" PAGE : " + repr(page) + " tweet: " + repr(len(search_results)) + " (total page : " + unicode(options.total_page) + " : rpp : "+unicode(options.rpp)+")" + processPage(search_results, curs, options.debug) + + conn.commit() + + curs.execute("select count(*) from tweet_tweet;") + res = curs.fetchone() + + total = res[0] + + print "Tweet for " + options.screen_name + " : " + unicode(total - old_total) +", Tweet total : " + repr(total) + + conn.close() + +