script/iri_tweet/export_tweet_db.py
author Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Tue, 11 Jan 2011 11:17:17 +0100
changeset 9 bb44692e09ee
child 11 54d7f1486ac4
permissions -rw-r--r--
script apres traitement enmi

from models import *
from utils import *
from optparse import OptionParser
from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import logging
import sqlite3
import sys


#    'entities': "tweet_entity",     
#    'user': "tweet_user"

def get_option():
    
    parser = OptionParser()

    parser.add_option("-l", "--log", dest="logfile",
                      help="log to file", metavar="LOG", default="stderr")
    parser.add_option("-v", dest="verbose", action="count",
                      help="verbose", metavar="VERBOSE", default=0)
    parser.add_option("-q", dest="quiet", action="count",
                      help="quiet", metavar="QUIET", default=0)

    return parser.parse_args()

if __name__ == "__main__":
    
    (options, args) = get_option()
    
    logging_config = {}
    
    if options.logfile == "stdout":
        logging_config["stream"] = sys.stdout
    elif options.logfile == "stderr":
        logging_config["stream"] = sys.stderr
    else:
        logging_config["filename"] = options.logfile

    logging_config["level"] = max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet))
    
    logging.basicConfig(**logging_config)
    
    with sqlite3.connect(args[0]) as conn_in:
        engine = create_engine('sqlite:///'+args[1], echo=((options.verbose-options.quiet)>0))
        metadata = Base.metadata
        metadata.create_all(engine)
        Session = sessionmaker(bind=engine)
        session = Session()
        try:
            curs_in = conn_in.cursor()
            fields_mapping = {}
            for i,res in enumerate(curs_in.execute("select json from tweet_tweet;")):
                logging.debug("main loop %d : %s" % (i, res[0]))
                json = eval(res[0])
                if "metadata" in json:
                    from_twitter_rest(json, res[0], session)
                else:
                    from_twitter_stream(json, res[0], session)       
                #if "user_mentions" in json["entities"]:
                #    for hash in json["entities"]["user_mentions"]:
                ##        for key,value in hash.items():
                #            if key not in fields_mapping or fields_mapping[key] is type(None):
                #                fields_mapping[key] = type(value)
            
            
            #for key,value in fields_mapping.items():
            #    print key,value.__name__
                session.commit()
            logging.debug("main : %d tweet processed" % (i+1))
        except Exception, e:
            session.rollback()
            raise e
        finally:
            session.close()