diff -r 4c870c767d3e -r a5eff8f2b81d script/lib/iri_tweet/export_twitter_alchemy.py --- a/script/lib/iri_tweet/export_twitter_alchemy.py Tue Sep 20 16:55:16 2011 +0200 +++ b/script/lib/iri_tweet/export_twitter_alchemy.py Thu Sep 22 12:37:53 2011 +0200 @@ -2,19 +2,19 @@ # coding=utf-8 from lxml import etree +from models import setup_database from optparse import OptionParser #@UnresolvedImport -from sqlalchemy import Table, Column, BigInteger, MetaData -from sqlalchemy.orm import sessionmaker -from utils import parse_date, set_logging_options, set_logging, get_filter_query, get_logger -from models import setup_database +from sqlalchemy import Table, Column, BigInteger +from utils import (parse_date, set_logging_options, set_logging, get_filter_query, + get_logger) +import anyjson import datetime +import httplib2 import os.path import re import sys import time import uuid #@UnresolvedImport -import httplib2 -import anyjson #class TweetExclude(object): # def __init__(self, id): @@ -57,9 +57,9 @@ parser.add_option("-d", "--database", dest="database", help="Input database", metavar="DATABASE") parser.add_option("-s", "--start-date", dest="start_date", - help="start date", metavar="START_DATE") + help="start date", metavar="START_DATE", default=None) parser.add_option("-e", "--end-date", dest="end_date", - help="end date", metavar="END_DATE") + help="end date", metavar="END_DATE", default=None) parser.add_option("-I", "--content-file", dest="content_file", help="Content file", metavar="CONTENT_FILE") parser.add_option("-c", "--content", dest="content", @@ -110,18 +110,16 @@ if not re.match("^\w+://.+", conn_str): conn_str = 'sqlite:///' + conn_str - engine, metadata = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False) - - Session = sessionmaker() - conn = engine.connect() + engine, metadata, Session = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False) + conn = None try : - session = Session(bind=conn) - try : - - metadata = MetaData(bind=conn) + conn = engine.connect() + session = None + try : + session = Session(bind=conn) tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY']) #mapper(TweetExclude, tweet_exclude_table) - metadata.create_all() + metadata.create_all(bind=conn, tables=[tweet_exclude_table]) if options.exclude and os.path.exists(options.exclude): with open(options.exclude, 'r+') as f: @@ -174,14 +172,16 @@ with open(user_whitelist_file, 'r+') as f: user_whitelist = list(set([s.strip() for s in f])) - start_date = parse_date(start_date_str) - ts = time.mktime(start_date.timetuple()) + start_date = None + ts = None + if start_date_str: + start_date = parse_date(start_date_str) + ts = time.mktime(start_date.timetuple()) + end_date = None if end_date_str: end_date = parse_date(end_date_str) - te = time.mktime(end_date.timetuple()) - else: - te = ts + duration + elif start_date and duration: end_date = start_date + datetime.timedelta(seconds=duration) query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist) @@ -276,6 +276,8 @@ for tw in query_res: tweet_ts_dt = tw.created_at tweet_ts = int(time.mktime(tweet_ts_dt.timetuple())) + if ts is None: + ts = tweet_ts tweet_ts_rel = (tweet_ts-ts) * 1000 username = None profile_url = "" @@ -330,6 +332,8 @@ output.close() finally: - session.close() + if session: + session.close() finally: - conn.close() + if conn: + conn.close()