script/lib/iri_tweet/utils.py
changeset 82 210dc265c70f
parent 32 c924e143576f
child 98 6e8930a1b8f7
--- a/script/lib/iri_tweet/utils.py	Fri Mar 18 12:23:02 2011 +0100
+++ b/script/lib/iri_tweet/utils.py	Fri Mar 18 15:45:49 2011 +0100
@@ -1,4 +1,5 @@
 from models import *
+from sqlalchemy.sql import select, or_
 import anyjson
 import datetime
 import email.utils
@@ -11,26 +12,26 @@
 import twitter_text
 
 
-CACHE_ACCESS_TOKEN = None
+CACHE_ACCESS_TOKEN = {}
 
-def get_oauth_token(token_file_path=None):
+def get_oauth_token(token_file_path=None, application_name=APPLICATION_NAME, consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET):
     
     global CACHE_ACCESS_TOKEN
     
-    if CACHE_ACCESS_TOKEN is not None:
-        return CACHE_ACCESS_TOKEN
+    if CACHE_ACCESS_TOKEN is not None and application_name in CACHE_ACCESS_TOKEN:
+        return CACHE_ACCESS_TOKEN[application_name]
     
     if token_file_path and os.path.exists(token_file_path):
         logging.debug("reading token from file %s" % token_file_path)
-        CACHE_ACCESS_TOKEN = twitter.oauth.read_token_file(token_file_path)
-        return CACHE_ACCESS_TOKEN
+        CACHE_ACCESS_TOKEN[application_name] = twitter.oauth.read_token_file(token_file_path)
+        return CACHE_ACCESS_TOKEN[application_name]
         #read access token info from path
     
     if 'ACCESS_TOKEN_KEY' in dict() and 'ACCESS_TOKEN_SECRET' in dict() and ACCESS_TOKEN_KEY and ACCESS_TOKEN_SECRET:
         return ACCESS_TOKEN_KEY,ACCESS_TOKEN_SECRET
     
-    CACHE_ACCESS_TOKEN = twitter.oauth_dance.oauth_dance(APPLICATION_NAME, CONSUMER_KEY, CONSUMER_SECRET, token_file_path)
-    return CACHE_ACCESS_TOKEN
+    CACHE_ACCESS_TOKEN[application_name] = twitter.oauth_dance.oauth_dance(application_name, consumer_key, consumer_secret, token_file_path)
+    return CACHE_ACCESS_TOKEN[application_name]
 
 def parse_date(date_str):
     ts = email.utils.parsedate_tz(date_str)
@@ -216,7 +217,7 @@
         # get or create user
         user = self.__get_user(self.json_dict["user"])
         if user is None:
-            logging.warning("USER not found " + repr(ts["user"]))
+            logging.warning("USER not found " + repr(self.json_dict["user"]))
             ts_copy["user"] = None
             ts_copy["user_id"] = None
         else:
@@ -321,3 +322,43 @@
                       help="verbose", metavar="VERBOSE", default=0)
     parser.add_option("-q", dest="quiet", action="count",
                       help="quiet", metavar="QUIET", default=0)
+
+    
+def get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table):
+    
+    query = session.query(Tweet).join(EntityHashtag).join(Hashtag)
+    if tweet_exclude_table is not None:
+        query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id])))
+        
+    query = query.filter(Tweet.created_at >=  start_date).filter(Tweet.created_at <=  end_date)
+    
+    if hashtags :
+        def merge_hash(l,h):
+            l.extend(h.split(","))
+            return l
+        htags = reduce(merge_hash, hashtags, [])
+        
+        query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags)))
+    
+    return query
+    
+
+def get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table):
+    
+    query = session.query(User).join(Tweet).join(EntityHashtag).join(Hashtag)
+    if tweet_exclude_table is not None:
+        query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id])))
+        
+    query = query.filter(Tweet.created_at >=  start_date).filter(Tweet.created_at <=  end_date)
+    
+    if hashtags :
+        def merge_hash(l,h):
+            l.extend(h.split(","))
+            return l
+        htags = reduce(merge_hash, hashtags, [])
+        
+        query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags)))
+    
+    return query.distinct()
+
+