diff -r ef088e58ae07 -r 210dc265c70f script/lib/iri_tweet/export_twitter_alchemy.py --- a/script/lib/iri_tweet/export_twitter_alchemy.py Fri Mar 18 12:23:02 2011 +0100 +++ b/script/lib/iri_tweet/export_twitter_alchemy.py Fri Mar 18 15:45:49 2011 +0100 @@ -7,7 +7,7 @@ from sqlalchemy import Table, Column, Integer, BigInteger, String, MetaData, \ ForeignKey from sqlalchemy.orm import sessionmaker, mapper -from sqlalchemy.sql import select +from sqlalchemy.sql import select, or_ from utils import * import datetime import email.utils @@ -83,7 +83,7 @@ parser.add_option("-C", "--color", dest="color", help="Color code", metavar="COLOR", default="16763904") parser.add_option("-H", "--hashtag", dest="hashtag", - help="Hashtag", metavar="HASHTAG", default="enmi") + help="Hashtag", metavar="HASHTAG", default=[], action="append") parser.add_option("-D", "--duration", dest="duration", type="int", help="Duration", metavar="DURATION", default=None) parser.add_option("-n", "--name", dest="name", @@ -144,14 +144,18 @@ params['end_date'] = snode.text elif snode.tag == "duration": params['duration'] = int(snode.text) + elif snode.tag == "hashtags": + params['hashtags'] = [snode.text] + if options.hashtag or 'hashtags' not in params : + params['hashtags'] = options.hashtag parameters.append(params) else: parameters = [{ 'start_date': options.start_date, 'end_date' : options.end_date, 'duration' : options.duration, - 'content_file' : options.content_file - + 'content_file' : options.content_file, + 'hashtags' : options.hashtag }] for params in parameters: @@ -162,6 +166,7 @@ end_date_str = params.get("end_date", None) duration = params.get("duration", None) content_file = params.get("content_file", None) + hashtags = params.get('hashtags', []) start_date = parse_date(start_date_str) @@ -173,9 +178,10 @@ else: te = ts + duration end_date = start_date + datetime.timedelta(seconds=duration) - - - query_res = session.query(Tweet).join(EntityHashtag).join(Hashtag).filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))).filter(Hashtag.text.contains(options.hashtag)).filter(Tweet.created_at >= start_date).filter(Tweet.created_at <= end_date).all() + + query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table) + + query_res = query.all() root = None ensemble_parent = None @@ -310,7 +316,7 @@ dest_file_name = options.filename logging.debug("WRITE : " + dest_file_name) - output = open(content_file, "w") + output = open(dest_file_name, "w") output.write(output_data) output.flush() output.close()