script/lib/iri_tweet/utils.py
changeset 289 a5eff8f2b81d
parent 255 500cd0405c7a
child 409 f7ceddf99d6d
--- a/script/lib/iri_tweet/utils.py	Tue Sep 20 16:55:16 2011 +0200
+++ b/script/lib/iri_tweet/utils.py	Thu Sep 22 12:37:53 2011 +0200
@@ -497,14 +497,17 @@
     parser.add_option("-q", dest="quiet", action="count",
                       help="quiet", metavar="QUIET", default=0)
 
+def get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist):
     
-def get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist):
+    query = query.join(EntityHashtag).join(Hashtag)
     
-    query = session.query(Tweet).join(EntityHashtag).join(Hashtag)
     if tweet_exclude_table is not None:
         query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable
-        
-    query = query.filter(Tweet.created_at >=  start_date).filter(Tweet.created_at <=  end_date)
+    
+    if start_date:
+        query = query.filter(Tweet.created_at >=  start_date)
+    if end_date:
+        query = query.filter(Tweet.created_at <=  end_date)
 
     if user_whitelist:
         query = query.join(User).filter(User.screen_name.in_(user_whitelist))
@@ -517,26 +520,23 @@
         htags = reduce(merge_hash, hashtags, [])
         
         query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable
-        
     
     return query
+
+    
+    
+def get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist):
+    
+    query = session.query(Tweet)
+    query = get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist) 
+    return query.order_by(Tweet.created_at)
     
 
 def get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table):
     
-    query = session.query(User).join(Tweet).join(EntityHashtag).join(Hashtag)
-    if tweet_exclude_table is not None:
-        query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable
-        
-    query = query.filter(Tweet.created_at >=  start_date).filter(Tweet.created_at <=  end_date)
+    query = session.query(User).join(Tweet)
     
-    if hashtags :
-        def merge_hash(l,h):
-            l.extend(h.split(","))
-            return l
-        htags = reduce(merge_hash, hashtags, [])
-        
-        query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable
+    query = get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, None)    
     
     return query.distinct()