script/iri_tweet/export_tweet_db.py
changeset 11 54d7f1486ac4
parent 9 bb44692e09ee
equal deleted inserted replaced
10:eb885a117aa0 11:54d7f1486ac4
     1 from models import *
     1 from models import *
     2 from utils import *
     2 from utils import *
     3 from optparse import OptionParser
     3 from optparse import OptionParser
     4 from sqlalchemy import create_engine
       
     5 from sqlalchemy.orm import sessionmaker
     4 from sqlalchemy.orm import sessionmaker
     6 import logging
     5 import logging
     7 import sqlite3
     6 import sqlite3
     8 import sys
     7 import sys
     9 
     8 
    13 
    12 
    14 def get_option():
    13 def get_option():
    15     
    14     
    16     parser = OptionParser()
    15     parser = OptionParser()
    17 
    16 
    18     parser.add_option("-l", "--log", dest="logfile",
    17     set_logging_options(parser)
    19                       help="log to file", metavar="LOG", default="stderr")
       
    20     parser.add_option("-v", dest="verbose", action="count",
       
    21                       help="verbose", metavar="VERBOSE", default=0)
       
    22     parser.add_option("-q", dest="quiet", action="count",
       
    23                       help="quiet", metavar="QUIET", default=0)
       
    24 
    18 
    25     return parser.parse_args()
    19     return parser.parse_args()
    26 
    20 
    27 if __name__ == "__main__":
    21 if __name__ == "__main__":
    28     
    22     
    29     (options, args) = get_option()
    23     (options, args) = get_option()
    30     
    24         
    31     logging_config = {}
    25     set_logging(options)
    32     
       
    33     if options.logfile == "stdout":
       
    34         logging_config["stream"] = sys.stdout
       
    35     elif options.logfile == "stderr":
       
    36         logging_config["stream"] = sys.stderr
       
    37     else:
       
    38         logging_config["filename"] = options.logfile
       
    39 
       
    40     logging_config["level"] = max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet))
       
    41     
       
    42     logging.basicConfig(**logging_config)
       
    43     
    26     
    44     with sqlite3.connect(args[0]) as conn_in:
    27     with sqlite3.connect(args[0]) as conn_in:
    45         engine = create_engine('sqlite:///'+args[1], echo=((options.verbose-options.quiet)>0))
    28         engine, metadata = setup_database('sqlite:///'+args[1], echo=((options.verbose-options.quiet)>0))
    46         metadata = Base.metadata
       
    47         metadata.create_all(engine)
       
    48         Session = sessionmaker(bind=engine)
    29         Session = sessionmaker(bind=engine)
    49         session = Session()
    30         session = Session()
    50         try:
    31         try:
    51             curs_in = conn_in.cursor()
    32             curs_in = conn_in.cursor()
    52             fields_mapping = {}
    33             fields_mapping = {}
    53             for i,res in enumerate(curs_in.execute("select json from tweet_tweet;")):
    34             for i,res in enumerate(curs_in.execute("select json from tweet_tweet;")):
    54                 logging.debug("main loop %d : %s" % (i, res[0]))
    35                 logging.debug("main loop %d : %s" % (i, res[0]))
    55                 json = eval(res[0])
    36                 processor = TwitterProcessor(eval(res[0]), res[0], session)
    56                 if "metadata" in json:
    37                 processor.process()
    57                     from_twitter_rest(json, res[0], session)
       
    58                 else:
       
    59                     from_twitter_stream(json, res[0], session)       
       
    60                 #if "user_mentions" in json["entities"]:
       
    61                 #    for hash in json["entities"]["user_mentions"]:
       
    62                 ##        for key,value in hash.items():
       
    63                 #            if key not in fields_mapping or fields_mapping[key] is type(None):
       
    64                 #                fields_mapping[key] = type(value)
       
    65             
       
    66             
       
    67             #for key,value in fields_mapping.items():
       
    68             #    print key,value.__name__
       
    69                 session.commit()
    38                 session.commit()
    70             logging.debug("main : %d tweet processed" % (i+1))
    39             logging.debug("main : %d tweet processed" % (i+1))
    71         except Exception, e:
    40         except Exception, e:
    72             session.rollback()
    41             session.rollback()
    73             raise e
    42             raise e