--- a/script/lib/iri_tweet/export_twitter_alchemy.py Tue Sep 20 16:55:16 2011 +0200
+++ b/script/lib/iri_tweet/export_twitter_alchemy.py Thu Sep 22 12:37:53 2011 +0200
@@ -2,19 +2,19 @@
# coding=utf-8
from lxml import etree
+from models import setup_database
from optparse import OptionParser #@UnresolvedImport
-from sqlalchemy import Table, Column, BigInteger, MetaData
-from sqlalchemy.orm import sessionmaker
-from utils import parse_date, set_logging_options, set_logging, get_filter_query, get_logger
-from models import setup_database
+from sqlalchemy import Table, Column, BigInteger
+from utils import (parse_date, set_logging_options, set_logging, get_filter_query,
+ get_logger)
+import anyjson
import datetime
+import httplib2
import os.path
import re
import sys
import time
import uuid #@UnresolvedImport
-import httplib2
-import anyjson
#class TweetExclude(object):
# def __init__(self, id):
@@ -57,9 +57,9 @@
parser.add_option("-d", "--database", dest="database",
help="Input database", metavar="DATABASE")
parser.add_option("-s", "--start-date", dest="start_date",
- help="start date", metavar="START_DATE")
+ help="start date", metavar="START_DATE", default=None)
parser.add_option("-e", "--end-date", dest="end_date",
- help="end date", metavar="END_DATE")
+ help="end date", metavar="END_DATE", default=None)
parser.add_option("-I", "--content-file", dest="content_file",
help="Content file", metavar="CONTENT_FILE")
parser.add_option("-c", "--content", dest="content",
@@ -110,18 +110,16 @@
if not re.match("^\w+://.+", conn_str):
conn_str = 'sqlite:///' + conn_str
- engine, metadata = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False)
-
- Session = sessionmaker()
- conn = engine.connect()
+ engine, metadata, Session = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False)
+ conn = None
try :
- session = Session(bind=conn)
- try :
-
- metadata = MetaData(bind=conn)
+ conn = engine.connect()
+ session = None
+ try :
+ session = Session(bind=conn)
tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY'])
#mapper(TweetExclude, tweet_exclude_table)
- metadata.create_all()
+ metadata.create_all(bind=conn, tables=[tweet_exclude_table])
if options.exclude and os.path.exists(options.exclude):
with open(options.exclude, 'r+') as f:
@@ -174,14 +172,16 @@
with open(user_whitelist_file, 'r+') as f:
user_whitelist = list(set([s.strip() for s in f]))
- start_date = parse_date(start_date_str)
- ts = time.mktime(start_date.timetuple())
+ start_date = None
+ ts = None
+ if start_date_str:
+ start_date = parse_date(start_date_str)
+ ts = time.mktime(start_date.timetuple())
+ end_date = None
if end_date_str:
end_date = parse_date(end_date_str)
- te = time.mktime(end_date.timetuple())
- else:
- te = ts + duration
+ elif start_date and duration:
end_date = start_date + datetime.timedelta(seconds=duration)
query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist)
@@ -276,6 +276,8 @@
for tw in query_res:
tweet_ts_dt = tw.created_at
tweet_ts = int(time.mktime(tweet_ts_dt.timetuple()))
+ if ts is None:
+ ts = tweet_ts
tweet_ts_rel = (tweet_ts-ts) * 1000
username = None
profile_url = ""
@@ -330,6 +332,8 @@
output.close()
finally:
- session.close()
+ if session:
+ session.close()
finally:
- conn.close()
+ if conn:
+ conn.close()