script/iri_tweet/export_tweet_db.py
changeset 9 bb44692e09ee
child 11 54d7f1486ac4
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script/iri_tweet/export_tweet_db.py	Tue Jan 11 11:17:17 2011 +0100
@@ -0,0 +1,78 @@
+from models import *
+from utils import *
+from optparse import OptionParser
+from sqlalchemy import create_engine
+from sqlalchemy.orm import sessionmaker
+import logging
+import sqlite3
+import sys
+
+
+#    'entities': "tweet_entity",     
+#    'user': "tweet_user"
+
+def get_option():
+    
+    parser = OptionParser()
+
+    parser.add_option("-l", "--log", dest="logfile",
+                      help="log to file", metavar="LOG", default="stderr")
+    parser.add_option("-v", dest="verbose", action="count",
+                      help="verbose", metavar="VERBOSE", default=0)
+    parser.add_option("-q", dest="quiet", action="count",
+                      help="quiet", metavar="QUIET", default=0)
+
+    return parser.parse_args()
+
+if __name__ == "__main__":
+    
+    (options, args) = get_option()
+    
+    logging_config = {}
+    
+    if options.logfile == "stdout":
+        logging_config["stream"] = sys.stdout
+    elif options.logfile == "stderr":
+        logging_config["stream"] = sys.stderr
+    else:
+        logging_config["filename"] = options.logfile
+
+    logging_config["level"] = max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet))
+    
+    logging.basicConfig(**logging_config)
+    
+    with sqlite3.connect(args[0]) as conn_in:
+        engine = create_engine('sqlite:///'+args[1], echo=((options.verbose-options.quiet)>0))
+        metadata = Base.metadata
+        metadata.create_all(engine)
+        Session = sessionmaker(bind=engine)
+        session = Session()
+        try:
+            curs_in = conn_in.cursor()
+            fields_mapping = {}
+            for i,res in enumerate(curs_in.execute("select json from tweet_tweet;")):
+                logging.debug("main loop %d : %s" % (i, res[0]))
+                json = eval(res[0])
+                if "metadata" in json:
+                    from_twitter_rest(json, res[0], session)
+                else:
+                    from_twitter_stream(json, res[0], session)       
+                #if "user_mentions" in json["entities"]:
+                #    for hash in json["entities"]["user_mentions"]:
+                ##        for key,value in hash.items():
+                #            if key not in fields_mapping or fields_mapping[key] is type(None):
+                #                fields_mapping[key] = type(value)
+            
+            
+            #for key,value in fields_mapping.items():
+            #    print key,value.__name__
+                session.commit()
+            logging.debug("main : %d tweet processed" % (i+1))
+        except Exception, e:
+            session.rollback()
+            raise e
+        finally:
+            session.close()
+            
+    
+    
\ No newline at end of file