script/rest/enmi_profile.py
changeset 9 bb44692e09ee
equal deleted inserted replaced
8:b7f4b0554ef8 9:bb44692e09ee
       
     1 import twython
       
     2 from sqlite3 import *
       
     3 import datetime, time
       
     4 import email.utils
       
     5 from optparse import OptionParser
       
     6 import os.path
       
     7 import os
       
     8 import sys
       
     9 import simplejson
       
    10 
       
    11 
       
    12 #options filename rpp page total_pages start_date end_date
       
    13 
       
    14 
       
    15  
       
    16 def adapt_datetime(ts):
       
    17     return time.mktime(ts.timetuple())
       
    18     
       
    19 def adapt_geo(geo):
       
    20 	return simplejson.dumps(geo)
       
    21 	
       
    22 def convert_geo(s):
       
    23 	return simplejson.loads(s)
       
    24 
       
    25 
       
    26 register_adapter(datetime.datetime, adapt_datetime)
       
    27 register_converter("geo", convert_geo)
       
    28 
       
    29 columns_tweet = [u'favorited', u'truncated', u'text', u'created_at', u'source', u'in_reply_to_status_id', u'in_reply_to_screen_name', u'in_reply_to_user_id', u'geo', u'id', u'user']
       
    30 columns_user = [u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'geo_enabled', u'profile_background_image_url', u'screen_name', u'profile_background_tile', u'favourites_count', u'name', u'url', u'created_at', u'time_zone', u'profile_sidebar_border_color', u'following']
       
    31 
       
    32 def processDate(entry):
       
    33     ts = email.utils.parsedate(entry["created_at"])
       
    34     entry["created_at_ts"] = datetime.datetime.fromtimestamp(time.mktime(ts))
       
    35 
       
    36 def processPage(page, cursor, debug):
       
    37     for entry in page:
       
    38         if debug:
       
    39             print "ENTRY : " + repr(entry)
       
    40         curs.execute("select id from tweet_tweet where id = ?", (entry["id"],))
       
    41         res = curs.fetchone()
       
    42         if res:
       
    43             continue
       
    44 
       
    45         entry_user = entry["user"]
       
    46         processDate(entry_user)
       
    47         cursor.execute("insert into tweet_user ("+",".join(entry_user.keys())+") values (:"+",:".join(entry_user.keys())+");", entry_user);
       
    48         new_id = cursor.lastrowid
       
    49         processDate(entry)
       
    50         entry["user"] = new_id
       
    51         if entry["geo"]:
       
    52             entry["geo"] = adapt_geo(entry["geo"])
       
    53         new_id = cursor.execute("insert into tweet_tweet ("+",".join(entry.keys())+") values (:"+",:".join(entry.keys())+");", entry);
       
    54 
       
    55 
       
    56 if __name__ == "__main__" :
       
    57 
       
    58     parser = OptionParser()
       
    59     parser.add_option("-f", "--file", dest="filename",  
       
    60                       help="write tweet to FILE", metavar="FILE", default="enmi2010_twitter_rest.db")
       
    61     parser.add_option("-r", "--rpp", dest="rpp",
       
    62                       help="Results per page", metavar="RESULT_PER_PAGE", default=200, type='int')
       
    63     parser.add_option("-p", "--page", dest="page",
       
    64                       help="page result", metavar="PAGE", default=1, type='int')
       
    65     parser.add_option("-t", "--total-page", dest="total_page",
       
    66                       help="Total page number", metavar="TOTAL_PAGE", default=16, type='int')
       
    67     parser.add_option("-s", "--screenname", dest="screen_name",
       
    68                       help="Twitter screen name", metavar="SCREEN_NAME")
       
    69     parser.add_option("-u", "--user", dest="username",
       
    70                       help="Twitter user", metavar="USER", default=None)
       
    71     parser.add_option("-w", "--password", dest="password",
       
    72                       help="Twitter password", metavar="PASSWORD", default=None)
       
    73     parser.add_option("-n", "--new", dest="new", action="store_true",
       
    74                       help="new database", default=False)
       
    75     parser.add_option("-d", "--debug", dest="debug", action="store_true",
       
    76                       help="debug", default=False)
       
    77     
       
    78 
       
    79 
       
    80     (options, args) = parser.parse_args()
       
    81     
       
    82     if options.debug:
       
    83     	print "OPTIONS : "
       
    84     	print repr(options)
       
    85 
       
    86     if options.screen_name is None:
       
    87         print "No Screen name. Exiting"
       
    88         sys.exit()
       
    89     
       
    90     if options.new and os.path.exists(options.filename):
       
    91         os.remove(options.filename)
       
    92     
       
    93     conn = connect(options.filename)
       
    94     conn.row_factory = Row
       
    95     curs = conn.cursor()
       
    96 
       
    97     curs.execute("create table if not exists tweet_user ("+ ",".join(columns_user) +", created_at_ts integer);")
       
    98 
       
    99     curs.execute("create table if not exists tweet_tweet ("+ ",".join(columns_tweet) +", created_at_ts integer);")
       
   100     curs.execute("create index if not exists id_index on tweet_tweet (id asc);");
       
   101     
       
   102     curs.execute("select count(*) from tweet_tweet;")
       
   103     res = curs.fetchone()
       
   104     
       
   105     old_total = res[0]
       
   106 
       
   107     twitter = twython.setup(username=options.username, password=options.password, headers="IRI enmi (python urllib)")
       
   108     twitter = twython.Twython(twitter_token = "54ThDZhpEjokcMgHJOMnQA", twitter_secret = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA")
       
   109 
       
   110     search_results = None
       
   111     page = options.page-1
       
   112 
       
   113     while (page < options.total_page and ( search_results is None  or len(search_results) > 0)):
       
   114         page += 1
       
   115         try:
       
   116             search_results = twitter.getUserTimeline(screen_name=options.screen_name, count=options.rpp, page=page)
       
   117         except twython.TwythonError, (e):
       
   118             print "NAME : "+ options.screen_name + " ERROR : " + repr(e.msg)
       
   119             break
       
   120         print "NAME : "+ options.screen_name +" PAGE : " + repr(page) + " tweet: " + repr(len(search_results)) + " (total page : " + unicode(options.total_page) + " : rpp : "+unicode(options.rpp)+")"
       
   121         processPage(search_results, curs, options.debug)
       
   122 
       
   123     conn.commit() 
       
   124 
       
   125     curs.execute("select count(*) from tweet_tweet;")
       
   126     res = curs.fetchone()
       
   127 
       
   128     total = res[0]
       
   129 
       
   130     print "Tweet for " + options.screen_name + " : " + unicode(total - old_total) +", Tweet total : " + repr(total)
       
   131 
       
   132     conn.close()
       
   133 
       
   134