script/lib/iri_tweet/export_twitter_alchemy.py
changeset 289 a5eff8f2b81d
parent 275 483cc4e35193
child 379 083320a74eb2
--- a/script/lib/iri_tweet/export_twitter_alchemy.py	Tue Sep 20 16:55:16 2011 +0200
+++ b/script/lib/iri_tweet/export_twitter_alchemy.py	Thu Sep 22 12:37:53 2011 +0200
@@ -2,19 +2,19 @@
 # coding=utf-8
 
 from lxml import etree
+from models import setup_database
 from optparse import OptionParser #@UnresolvedImport
-from sqlalchemy import Table, Column, BigInteger, MetaData
-from sqlalchemy.orm import sessionmaker
-from utils import parse_date, set_logging_options, set_logging, get_filter_query, get_logger
-from models import setup_database
+from sqlalchemy import Table, Column, BigInteger
+from utils import (parse_date, set_logging_options, set_logging, get_filter_query, 
+    get_logger)
+import anyjson
 import datetime
+import httplib2
 import os.path
 import re
 import sys
 import time
 import uuid #@UnresolvedImport
-import httplib2
-import anyjson
 
 #class TweetExclude(object):
 #    def __init__(self, id):
@@ -57,9 +57,9 @@
     parser.add_option("-d", "--database", dest="database",
                       help="Input database", metavar="DATABASE")
     parser.add_option("-s", "--start-date", dest="start_date",
-                      help="start date", metavar="START_DATE")
+                      help="start date", metavar="START_DATE", default=None)
     parser.add_option("-e", "--end-date", dest="end_date",
-                      help="end date", metavar="END_DATE")
+                      help="end date", metavar="END_DATE", default=None)
     parser.add_option("-I", "--content-file", dest="content_file",
                       help="Content file", metavar="CONTENT_FILE")
     parser.add_option("-c", "--content", dest="content",
@@ -110,18 +110,16 @@
     if not re.match("^\w+://.+", conn_str):
         conn_str = 'sqlite:///' + conn_str
 
-    engine, metadata = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False)        
-    
-    Session = sessionmaker()
-    conn = engine.connect()
+    engine, metadata, Session = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False)        
+    conn = None
     try :
-        session = Session(bind=conn)
-        try : 
-        
-            metadata = MetaData(bind=conn)
+        conn = engine.connect()    
+        session = None
+        try :
+            session = Session(bind=conn)         
             tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY'])
             #mapper(TweetExclude, tweet_exclude_table)
-            metadata.create_all()
+            metadata.create_all(bind=conn, tables=[tweet_exclude_table])
             
             if options.exclude and os.path.exists(options.exclude):
                 with open(options.exclude, 'r+') as f:
@@ -174,14 +172,16 @@
                     with open(user_whitelist_file, 'r+') as f:
                         user_whitelist = list(set([s.strip() for s in f]))
                 
-                start_date = parse_date(start_date_str) 
-                ts = time.mktime(start_date.timetuple())
+                start_date = None
+                ts = None
+                if start_date_str:
+                    start_date = parse_date(start_date_str) 
+                    ts = time.mktime(start_date.timetuple())
             
+                end_date = None
                 if end_date_str:
                     end_date = parse_date(end_date_str)
-                    te = time.mktime(end_date.timetuple())
-                else:
-                    te = ts + duration
+                elif start_date and duration:
                     end_date = start_date + datetime.timedelta(seconds=duration)
                 
                 query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist)
@@ -276,6 +276,8 @@
                 for tw in query_res:
                     tweet_ts_dt = tw.created_at
                     tweet_ts = int(time.mktime(tweet_ts_dt.timetuple()))
+                    if ts is None:
+                        ts = tweet_ts
                     tweet_ts_rel = (tweet_ts-ts) * 1000
                     username = None
                     profile_url = ""
@@ -330,6 +332,8 @@
                     output.close()
                 
         finally:
-            session.close()
+            if session:
+                session.close()
     finally:
-        conn.close()
+        if conn:
+            conn.close()