--- a/script/lib/iri_tweet/export_twitter_alchemy.py Fri Mar 18 12:23:02 2011 +0100
+++ b/script/lib/iri_tweet/export_twitter_alchemy.py Fri Mar 18 15:45:49 2011 +0100
@@ -7,7 +7,7 @@
from sqlalchemy import Table, Column, Integer, BigInteger, String, MetaData, \
ForeignKey
from sqlalchemy.orm import sessionmaker, mapper
-from sqlalchemy.sql import select
+from sqlalchemy.sql import select, or_
from utils import *
import datetime
import email.utils
@@ -83,7 +83,7 @@
parser.add_option("-C", "--color", dest="color",
help="Color code", metavar="COLOR", default="16763904")
parser.add_option("-H", "--hashtag", dest="hashtag",
- help="Hashtag", metavar="HASHTAG", default="enmi")
+ help="Hashtag", metavar="HASHTAG", default=[], action="append")
parser.add_option("-D", "--duration", dest="duration", type="int",
help="Duration", metavar="DURATION", default=None)
parser.add_option("-n", "--name", dest="name",
@@ -144,14 +144,18 @@
params['end_date'] = snode.text
elif snode.tag == "duration":
params['duration'] = int(snode.text)
+ elif snode.tag == "hashtags":
+ params['hashtags'] = [snode.text]
+ if options.hashtag or 'hashtags' not in params :
+ params['hashtags'] = options.hashtag
parameters.append(params)
else:
parameters = [{
'start_date': options.start_date,
'end_date' : options.end_date,
'duration' : options.duration,
- 'content_file' : options.content_file
-
+ 'content_file' : options.content_file,
+ 'hashtags' : options.hashtag
}]
for params in parameters:
@@ -162,6 +166,7 @@
end_date_str = params.get("end_date", None)
duration = params.get("duration", None)
content_file = params.get("content_file", None)
+ hashtags = params.get('hashtags', [])
start_date = parse_date(start_date_str)
@@ -173,9 +178,10 @@
else:
te = ts + duration
end_date = start_date + datetime.timedelta(seconds=duration)
-
-
- query_res = session.query(Tweet).join(EntityHashtag).join(Hashtag).filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))).filter(Hashtag.text.contains(options.hashtag)).filter(Tweet.created_at >= start_date).filter(Tweet.created_at <= end_date).all()
+
+ query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table)
+
+ query_res = query.all()
root = None
ensemble_parent = None
@@ -310,7 +316,7 @@
dest_file_name = options.filename
logging.debug("WRITE : " + dest_file_name)
- output = open(content_file, "w")
+ output = open(dest_file_name, "w")
output.write(output_data)
output.flush()
output.close()