diff -r b7f4b0554ef8 -r bb44692e09ee script/iri_tweet/export_tweet_db.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/script/iri_tweet/export_tweet_db.py Tue Jan 11 11:17:17 2011 +0100 @@ -0,0 +1,78 @@ +from models import * +from utils import * +from optparse import OptionParser +from sqlalchemy import create_engine +from sqlalchemy.orm import sessionmaker +import logging +import sqlite3 +import sys + + +# 'entities': "tweet_entity", +# 'user': "tweet_user" + +def get_option(): + + parser = OptionParser() + + parser.add_option("-l", "--log", dest="logfile", + help="log to file", metavar="LOG", default="stderr") + parser.add_option("-v", dest="verbose", action="count", + help="verbose", metavar="VERBOSE", default=0) + parser.add_option("-q", dest="quiet", action="count", + help="quiet", metavar="QUIET", default=0) + + return parser.parse_args() + +if __name__ == "__main__": + + (options, args) = get_option() + + logging_config = {} + + if options.logfile == "stdout": + logging_config["stream"] = sys.stdout + elif options.logfile == "stderr": + logging_config["stream"] = sys.stderr + else: + logging_config["filename"] = options.logfile + + logging_config["level"] = max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet)) + + logging.basicConfig(**logging_config) + + with sqlite3.connect(args[0]) as conn_in: + engine = create_engine('sqlite:///'+args[1], echo=((options.verbose-options.quiet)>0)) + metadata = Base.metadata + metadata.create_all(engine) + Session = sessionmaker(bind=engine) + session = Session() + try: + curs_in = conn_in.cursor() + fields_mapping = {} + for i,res in enumerate(curs_in.execute("select json from tweet_tweet;")): + logging.debug("main loop %d : %s" % (i, res[0])) + json = eval(res[0]) + if "metadata" in json: + from_twitter_rest(json, res[0], session) + else: + from_twitter_stream(json, res[0], session) + #if "user_mentions" in json["entities"]: + # for hash in json["entities"]["user_mentions"]: + ## for key,value in hash.items(): + # if key not in fields_mapping or fields_mapping[key] is type(None): + # fields_mapping[key] = type(value) + + + #for key,value in fields_mapping.items(): + # print key,value.__name__ + session.commit() + logging.debug("main : %d tweet processed" % (i+1)) + except Exception, e: + session.rollback() + raise e + finally: + session.close() + + + \ No newline at end of file