script/rest/enmi_profile.py
author Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Tue, 26 Apr 2011 13:57:29 +0200
changeset 122 4c3a15877f80
parent 9 bb44692e09ee
permissions -rw-r--r--
clean php and python scripts

import twython
from sqlite3 import *
import datetime, time
import email.utils
from optparse import OptionParser
import os.path
import os
import sys
import simplejson


#options filename rpp page total_pages start_date end_date


 
def adapt_datetime(ts):
    return time.mktime(ts.timetuple())
    
def adapt_geo(geo):
	return simplejson.dumps(geo)
	
def convert_geo(s):
	return simplejson.loads(s)


register_adapter(datetime.datetime, adapt_datetime)
register_converter("geo", convert_geo)

columns_tweet = [u'favorited', u'truncated', u'text', u'created_at', u'source', u'in_reply_to_status_id', u'in_reply_to_screen_name', u'in_reply_to_user_id', u'geo', u'id', u'user']
columns_user = [u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'geo_enabled', u'profile_background_image_url', u'screen_name', u'profile_background_tile', u'favourites_count', u'name', u'url', u'created_at', u'time_zone', u'profile_sidebar_border_color', u'following']

def processDate(entry):
    ts = email.utils.parsedate(entry["created_at"])
    entry["created_at_ts"] = datetime.datetime.fromtimestamp(time.mktime(ts))

def processPage(page, cursor, debug):
    for entry in page:
        if debug:
            print "ENTRY : " + repr(entry)
        curs.execute("select id from tweet_tweet where id = ?", (entry["id"],))
        res = curs.fetchone()
        if res:
            continue

        entry_user = entry["user"]
        processDate(entry_user)
        cursor.execute("insert into tweet_user ("+",".join(entry_user.keys())+") values (:"+",:".join(entry_user.keys())+");", entry_user);
        new_id = cursor.lastrowid
        processDate(entry)
        entry["user"] = new_id
        if entry["geo"]:
            entry["geo"] = adapt_geo(entry["geo"])
        new_id = cursor.execute("insert into tweet_tweet ("+",".join(entry.keys())+") values (:"+",:".join(entry.keys())+");", entry);


if __name__ == "__main__" :

    parser = OptionParser()
    parser.add_option("-f", "--file", dest="filename",  
                      help="write tweet to FILE", metavar="FILE", default="enmi2010_twitter_rest.db")
    parser.add_option("-r", "--rpp", dest="rpp",
                      help="Results per page", metavar="RESULT_PER_PAGE", default=200, type='int')
    parser.add_option("-p", "--page", dest="page",
                      help="page result", metavar="PAGE", default=1, type='int')
    parser.add_option("-t", "--total-page", dest="total_page",
                      help="Total page number", metavar="TOTAL_PAGE", default=16, type='int')
    parser.add_option("-s", "--screenname", dest="screen_name",
                      help="Twitter screen name", metavar="SCREEN_NAME")
    parser.add_option("-u", "--user", dest="username",
                      help="Twitter user", metavar="USER", default=None)
    parser.add_option("-w", "--password", dest="password",
                      help="Twitter password", metavar="PASSWORD", default=None)
    parser.add_option("-n", "--new", dest="new", action="store_true",
                      help="new database", default=False)
    parser.add_option("-d", "--debug", dest="debug", action="store_true",
                      help="debug", default=False)
    


    (options, args) = parser.parse_args()
    
    if options.debug:
    	print "OPTIONS : "
    	print repr(options)

    if options.screen_name is None:
        print "No Screen name. Exiting"
        sys.exit()
    
    if options.new and os.path.exists(options.filename):
        os.remove(options.filename)
    
    conn = connect(options.filename)
    conn.row_factory = Row
    curs = conn.cursor()

    curs.execute("create table if not exists tweet_user ("+ ",".join(columns_user) +", created_at_ts integer);")

    curs.execute("create table if not exists tweet_tweet ("+ ",".join(columns_tweet) +", created_at_ts integer);")
    curs.execute("create index if not exists id_index on tweet_tweet (id asc);");
    
    curs.execute("select count(*) from tweet_tweet;")
    res = curs.fetchone()
    
    old_total = res[0]

    twitter = twython.setup(username=options.username, password=options.password, headers="IRI enmi (python urllib)")
    twitter = twython.Twython(twitter_token = "54ThDZhpEjokcMgHJOMnQA", twitter_secret = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA")

    search_results = None
    page = options.page-1

    while (page < options.total_page and ( search_results is None  or len(search_results) > 0)):
        page += 1
        try:
            search_results = twitter.getUserTimeline(screen_name=options.screen_name, count=options.rpp, page=page)
        except twython.TwythonError, (e):
            print "NAME : "+ options.screen_name + " ERROR : " + repr(e.msg)
            break
        print "NAME : "+ options.screen_name +" PAGE : " + repr(page) + " tweet: " + repr(len(search_results)) + " (total page : " + unicode(options.total_page) + " : rpp : "+unicode(options.rpp)+")"
        processPage(search_results, curs, options.debug)

    conn.commit() 

    curs.execute("select count(*) from tweet_tweet;")
    res = curs.fetchone()

    total = res[0]

    print "Tweet for " + options.screen_name + " : " + unicode(total - old_total) +", Tweet total : " + repr(total)

    conn.close()