script/rest/search_twitter.py
author Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Thu, 22 Sep 2011 12:37:53 +0200
changeset 289 a5eff8f2b81d
parent 255 500cd0405c7a
child 411 0471e6eb8a1b
permissions -rw-r--r--
improve session maker creation + models version + add model version in db

from iri_tweet import models, utils
from sqlalchemy.orm import sessionmaker
import anyjson
import sqlite3
import twitter
from optparse import OptionParser


def get_option():
    
    parser = OptionParser()

    parser.add_option("-l", "--log", dest="logfile",
                      help="log to file", metavar="LOG", default="stderr")
    parser.add_option("-v", dest="verbose", action="count",
                      help="verbose", metavar="VERBOSE", default=0)
    parser.add_option("-q", dest="quiet", action="count",
                      help="quiet", metavar="QUIET", default=0)
    parser.add_option("-r", "--request", dest="request",
                      help="twitter request", metavar="REQUEST", default=0)
    parser.add_option("-Q", dest="query",
                      help="query", metavar="QUERY")
    parser.add_option("-P", dest="rpp", metavar="RPP", default="50",
                      help="Result per page")
    parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
                      help="Token file name")
    

    #add request token
    #add 

    return parser.parse_args()

if __name__ == "__main__":

    (options, args) = get_option()

    twitter = twitter.Twitter(domain="search.twitter.com")
    
    conn_str = args[0].strip()
    if not re.match("^\w+://.+", conn_str):
        conn_str = 'sqlite:///' + conn_str

    
    engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True)
    session = None
    try:
        session = Session()
        #conn.row_factory = sqlite3.Row
        #curs = conn.cursor()
        #curs.execute("create table if not exists tweet_tweet (json);")
        #conn.commit()
        
        results = None        
        page = 1
        
        while page <= int(1500/int(options.rpp)) and  ( results is None  or len(results) > 0):
            results = twitter.search(q=options.query, rpp=options.rpp, page=page)
            for tweet in results["results"]:
                print tweet
                tweet_str = anyjson.serialize(tweet)
                #invalidate user id
                processor = utils.TwitterProcessor(tweet, tweet_str, None, session, None, options.token_filename)
                processor.process()
                session.flush()
                session.commit()
            page += 1
            #session.commit()
    finally:
        if session:
            session.close()