--- a/script/iri_tweet/export_tweet_db.py Wed Jan 12 13:25:01 2011 +0100
+++ b/script/iri_tweet/export_tweet_db.py Tue Jan 18 10:08:03 2011 +0100
@@ -1,7 +1,6 @@
from models import *
from utils import *
from optparse import OptionParser
-from sqlalchemy import create_engine
from sqlalchemy.orm import sessionmaker
import logging
import sqlite3
@@ -15,36 +14,18 @@
parser = OptionParser()
- parser.add_option("-l", "--log", dest="logfile",
- help="log to file", metavar="LOG", default="stderr")
- parser.add_option("-v", dest="verbose", action="count",
- help="verbose", metavar="VERBOSE", default=0)
- parser.add_option("-q", dest="quiet", action="count",
- help="quiet", metavar="QUIET", default=0)
+ set_logging_options(parser)
return parser.parse_args()
if __name__ == "__main__":
(options, args) = get_option()
-
- logging_config = {}
-
- if options.logfile == "stdout":
- logging_config["stream"] = sys.stdout
- elif options.logfile == "stderr":
- logging_config["stream"] = sys.stderr
- else:
- logging_config["filename"] = options.logfile
-
- logging_config["level"] = max(logging.NOTSET, min(logging.CRITICAL, logging.WARNING - 10 * options.verbose + 10 * options.quiet))
-
- logging.basicConfig(**logging_config)
+
+ set_logging(options)
with sqlite3.connect(args[0]) as conn_in:
- engine = create_engine('sqlite:///'+args[1], echo=((options.verbose-options.quiet)>0))
- metadata = Base.metadata
- metadata.create_all(engine)
+ engine, metadata = setup_database('sqlite:///'+args[1], echo=((options.verbose-options.quiet)>0))
Session = sessionmaker(bind=engine)
session = Session()
try:
@@ -52,20 +33,8 @@
fields_mapping = {}
for i,res in enumerate(curs_in.execute("select json from tweet_tweet;")):
logging.debug("main loop %d : %s" % (i, res[0]))
- json = eval(res[0])
- if "metadata" in json:
- from_twitter_rest(json, res[0], session)
- else:
- from_twitter_stream(json, res[0], session)
- #if "user_mentions" in json["entities"]:
- # for hash in json["entities"]["user_mentions"]:
- ## for key,value in hash.items():
- # if key not in fields_mapping or fields_mapping[key] is type(None):
- # fields_mapping[key] = type(value)
-
-
- #for key,value in fields_mapping.items():
- # print key,value.__name__
+ processor = TwitterProcessor(eval(res[0]), res[0], session)
+ processor.process()
session.commit()
logging.debug("main : %d tweet processed" % (i+1))
except Exception, e: