script/rest/search_twitter.py
author Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Fri, 10 May 2013 14:46:57 +0200
changeset 895 aa7ad01b8260
parent 891 8628c590f608
child 914 43876221071f
permissions -rw-r--r--
clean hg ignore

from iri_tweet import models, processor
from optparse import OptionParser
import anyjson
import re
import twitter


def get_option():
    
    parser = OptionParser()

    parser.add_option("-l", "--log", dest="logfile",
                      help="log to file", metavar="LOG", default="stderr")
    parser.add_option("-v", dest="verbose", action="count",
                      help="verbose", metavar="VERBOSE", default=0)
    parser.add_option("-q", dest="quiet", action="count",
                      help="quiet", metavar="QUIET", default=0)
    parser.add_option("-Q", dest="query",
                      help="query", metavar="QUERY")
    parser.add_option("-P", dest="rpp", metavar="RPP", default="50",
                      help="Result per page")
    parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
                      help="Token file name")
    

    return parser.parse_args()

if __name__ == "__main__":

    (options, args) = get_option()

    t = twitter.Twitter(domain="search.twitter.com")
    t.secure = False
    
    conn_str = args[0].strip()
    if not re.match("^\w+://.+", conn_str):
        conn_str = 'sqlite:///' + conn_str

    
    engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True)
    session = None
    try:
        session = Session()
        #conn.row_factory = sqlite3.Row
        #curs = conn.cursor()
        #curs.execute("create table if not exists tweet_tweet (json);")
        #conn.commit()
        
        results = None        
        page = 1
        print options.query
        
        while page <= int(1500/int(options.rpp)) and  ( results is None  or len(results) > 0):
            results = t.search(q=options.query, rpp=options.rpp, page=page, include_entities=True)
            
            for tweet in results["results"]:
                print tweet
                tweet_str = anyjson.serialize(tweet)
                #invalidate user id
                processor = processor.TwitterProcessorStatus(json_dict=tweet, json_txt=tweet_str, source_id=None, session=session, consumer_token=None, access_token=None, token_filename=options.token_filename, user_query_twitter=False, logger=None)
                processor.process()
                session.flush()
                session.commit()
            page += 1
            #session.commit()
    finally:
        if session:
            session.close()