script/rest/search_twitter.py
author Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Wed, 27 Jul 2011 00:04:55 +0200
changeset 242 cdd7d3c0549c
parent 122 4c3a15877f80
child 255 500cd0405c7a
permissions -rw-r--r--
Starting 'parallel_twitter' branch

from iri_tweet import models, utils
from sqlalchemy.orm import sessionmaker
import anyjson
import sqlite3
import twitter
from optparse import OptionParser


def get_option():
    
    parser = OptionParser()

    parser.add_option("-l", "--log", dest="logfile",
                      help="log to file", metavar="LOG", default="stderr")
    parser.add_option("-v", dest="verbose", action="count",
                      help="verbose", metavar="VERBOSE", default=0)
    parser.add_option("-q", dest="quiet", action="count",
                      help="quiet", metavar="QUIET", default=0)
    parser.add_option("-r", "--request", dest="request",
                      help="twitter request", metavar="REQUEST", default=0)
    parser.add_option("-Q", dest="query",
                      help="query", metavar="QUERY")
    parser.add_option("-P", dest="rpp", metavar="RPP", default="50",
                      help="Result per page")
    parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
                      help="Token file name")
    

    #add request token
    #add 

    return parser.parse_args()

if __name__ == "__main__":

    (options, args) = get_option()

    twitter = twitter.Twitter(domain="search.twitter.com")
    engine, metadata = models.setup_database('sqlite:///'+args[0], echo=((options.verbose-options.quiet)>0))
    Session = sessionmaker(bind=engine)
    session = Session()
    try:
        #conn.row_factory = sqlite3.Row
        #curs = conn.cursor()
        #curs.execute("create table if not exists tweet_tweet (json);")
        #conn.commit()
        
        results = None        
        page = 1
        
        while page <= int(1500/int(options.rpp)) and  ( results is None  or len(results) > 0):
            results = twitter.search(q=options.query, rpp=options.rpp, page=page)
            for tweet in results["results"]:
                print tweet
                tweet_str = anyjson.serialize(tweet)
                #invalidate user id
                processor = utils.TwitterProcessor(tweet, tweet_str, None, session, options.token_filename)
                processor.process()
                session.flush()
                session.commit()
            page += 1
            #session.commit()
    finally:
        session.close()