script/lib/iri_tweet/export_twitter_alchemy.py
changeset 82 210dc265c70f
parent 39 c5d7dd0ec4e1
child 83 4a759c70e40f
--- a/script/lib/iri_tweet/export_twitter_alchemy.py	Fri Mar 18 12:23:02 2011 +0100
+++ b/script/lib/iri_tweet/export_twitter_alchemy.py	Fri Mar 18 15:45:49 2011 +0100
@@ -7,7 +7,7 @@
 from sqlalchemy import Table, Column, Integer, BigInteger, String, MetaData, \
     ForeignKey
 from sqlalchemy.orm import sessionmaker, mapper
-from sqlalchemy.sql import select
+from sqlalchemy.sql import select, or_
 from utils import *
 import datetime
 import email.utils
@@ -83,7 +83,7 @@
     parser.add_option("-C", "--color", dest="color",
                       help="Color code", metavar="COLOR", default="16763904")
     parser.add_option("-H", "--hashtag", dest="hashtag",
-                      help="Hashtag", metavar="HASHTAG", default="enmi")                      
+                      help="Hashtag", metavar="HASHTAG", default=[], action="append")                      
     parser.add_option("-D", "--duration", dest="duration", type="int",
                       help="Duration", metavar="DURATION", default=None)
     parser.add_option("-n", "--name", dest="name",
@@ -144,14 +144,18 @@
                             params['end_date'] = snode.text
                         elif snode.tag == "duration":
                             params['duration'] = int(snode.text)
+                        elif snode.tag == "hashtags":
+                            params['hashtags'] = [snode.text]
+                    if options.hashtag or 'hashtags' not in params :
+                        params['hashtags'] = options.hashtag
                     parameters.append(params)
             else:                        
                 parameters = [{
                     'start_date': options.start_date,
                     'end_date' : options.end_date,
                     'duration' : options.duration,
-                    'content_file' : options.content_file
-                    
+                    'content_file' : options.content_file,
+                    'hashtags' : options.hashtag
                 }]
             
             for params in parameters:
@@ -162,6 +166,7 @@
                 end_date_str = params.get("end_date", None)
                 duration = params.get("duration", None)
                 content_file = params.get("content_file", None)
+                hashtags = params.get('hashtags', [])
                 
                 
                 start_date = parse_date(start_date_str) 
@@ -173,9 +178,10 @@
                 else:
                     te = ts + duration
                     end_date = start_date + datetime.timedelta(seconds=duration)
-        
-            
-                query_res = session.query(Tweet).join(EntityHashtag).join(Hashtag).filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))).filter(Hashtag.text.contains(options.hashtag)).filter(Tweet.created_at >=  start_date).filter(Tweet.created_at <=  end_date).all()
+                
+                query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table)
+                    
+                query_res = query.all()
                                  
                 root = None
                 ensemble_parent = None
@@ -310,7 +316,7 @@
                         dest_file_name = options.filename
             
                     logging.debug("WRITE : " + dest_file_name)
-                    output = open(content_file, "w")
+                    output = open(dest_file_name, "w")
                     output.write(output_data)
                     output.flush()
                     output.close()