script/iri_tweet/export_tweet_db.py
changeset 11 54d7f1486ac4
parent 9 bb44692e09ee
--- a/script/iri_tweet/export_tweet_db.py	Wed Jan 12 13:25:01 2011 +0100
+++ b/script/iri_tweet/export_tweet_db.py	Tue Jan 18 10:08:03 2011 +0100
@@ -1,7 +1,6 @@
 from models import *
 from utils import *
 from optparse import OptionParser
-from sqlalchemy import create_engine
 from sqlalchemy.orm import sessionmaker
 import logging
 import sqlite3
@@ -15,36 +14,18 @@
     
     parser = OptionParser()
 
-    parser.add_option("-l", "--log", dest="logfile",
-                      help="log to file", metavar="LOG", default="stderr")
-    parser.add_option("-v", dest="verbose", action="count",
-                      help="verbose", metavar="VERBOSE", default=0)
-    parser.add_option("-q", dest="quiet", action="count",
-                      help="quiet", metavar="QUIET", default=0)
+    set_logging_options(parser)
 
     return parser.parse_args()
 
 if __name__ == "__main__":
     
     (options, args) = get_option()
-    
-    logging_config = {}
-    
-    if options.logfile == "stdout":
-        logging_config["stream"] = sys.stdout
-    elif options.logfile == "stderr":
-        logging_config["stream"] = sys.stderr
-    else:
-        logging_config["filename"] = options.logfile
-
-    logging_config["level"] = max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet))
-    
-    logging.basicConfig(**logging_config)
+        
+    set_logging(options)
     
     with sqlite3.connect(args[0]) as conn_in:
-        engine = create_engine('sqlite:///'+args[1], echo=((options.verbose-options.quiet)>0))
-        metadata = Base.metadata
-        metadata.create_all(engine)
+        engine, metadata = setup_database('sqlite:///'+args[1], echo=((options.verbose-options.quiet)>0))
         Session = sessionmaker(bind=engine)
         session = Session()
         try:
@@ -52,20 +33,8 @@
             fields_mapping = {}
             for i,res in enumerate(curs_in.execute("select json from tweet_tweet;")):
                 logging.debug("main loop %d : %s" % (i, res[0]))
-                json = eval(res[0])
-                if "metadata" in json:
-                    from_twitter_rest(json, res[0], session)
-                else:
-                    from_twitter_stream(json, res[0], session)       
-                #if "user_mentions" in json["entities"]:
-                #    for hash in json["entities"]["user_mentions"]:
-                ##        for key,value in hash.items():
-                #            if key not in fields_mapping or fields_mapping[key] is type(None):
-                #                fields_mapping[key] = type(value)
-            
-            
-            #for key,value in fields_mapping.items():
-            #    print key,value.__name__
+                processor = TwitterProcessor(eval(res[0]), res[0], session)
+                processor.process()
                 session.commit()
             logging.debug("main : %d tweet processed" % (i+1))
         except Exception, e: