diff -r ef088e58ae07 -r 210dc265c70f script/lib/iri_tweet/utils.py --- a/script/lib/iri_tweet/utils.py Fri Mar 18 12:23:02 2011 +0100 +++ b/script/lib/iri_tweet/utils.py Fri Mar 18 15:45:49 2011 +0100 @@ -1,4 +1,5 @@ from models import * +from sqlalchemy.sql import select, or_ import anyjson import datetime import email.utils @@ -11,26 +12,26 @@ import twitter_text -CACHE_ACCESS_TOKEN = None +CACHE_ACCESS_TOKEN = {} -def get_oauth_token(token_file_path=None): +def get_oauth_token(token_file_path=None, application_name=APPLICATION_NAME, consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET): global CACHE_ACCESS_TOKEN - if CACHE_ACCESS_TOKEN is not None: - return CACHE_ACCESS_TOKEN + if CACHE_ACCESS_TOKEN is not None and application_name in CACHE_ACCESS_TOKEN: + return CACHE_ACCESS_TOKEN[application_name] if token_file_path and os.path.exists(token_file_path): logging.debug("reading token from file %s" % token_file_path) - CACHE_ACCESS_TOKEN = twitter.oauth.read_token_file(token_file_path) - return CACHE_ACCESS_TOKEN + CACHE_ACCESS_TOKEN[application_name] = twitter.oauth.read_token_file(token_file_path) + return CACHE_ACCESS_TOKEN[application_name] #read access token info from path if 'ACCESS_TOKEN_KEY' in dict() and 'ACCESS_TOKEN_SECRET' in dict() and ACCESS_TOKEN_KEY and ACCESS_TOKEN_SECRET: return ACCESS_TOKEN_KEY,ACCESS_TOKEN_SECRET - CACHE_ACCESS_TOKEN = twitter.oauth_dance.oauth_dance(APPLICATION_NAME, CONSUMER_KEY, CONSUMER_SECRET, token_file_path) - return CACHE_ACCESS_TOKEN + CACHE_ACCESS_TOKEN[application_name] = twitter.oauth_dance.oauth_dance(application_name, consumer_key, consumer_secret, token_file_path) + return CACHE_ACCESS_TOKEN[application_name] def parse_date(date_str): ts = email.utils.parsedate_tz(date_str) @@ -216,7 +217,7 @@ # get or create user user = self.__get_user(self.json_dict["user"]) if user is None: - logging.warning("USER not found " + repr(ts["user"])) + logging.warning("USER not found " + repr(self.json_dict["user"])) ts_copy["user"] = None ts_copy["user_id"] = None else: @@ -321,3 +322,43 @@ help="verbose", metavar="VERBOSE", default=0) parser.add_option("-q", dest="quiet", action="count", help="quiet", metavar="QUIET", default=0) + + +def get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table): + + query = session.query(Tweet).join(EntityHashtag).join(Hashtag) + if tweet_exclude_table is not None: + query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) + + query = query.filter(Tweet.created_at >= start_date).filter(Tweet.created_at <= end_date) + + if hashtags : + def merge_hash(l,h): + l.extend(h.split(",")) + return l + htags = reduce(merge_hash, hashtags, []) + + query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) + + return query + + +def get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table): + + query = session.query(User).join(Tweet).join(EntityHashtag).join(Hashtag) + if tweet_exclude_table is not None: + query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) + + query = query.filter(Tweet.created_at >= start_date).filter(Tweet.created_at <= end_date) + + if hashtags : + def merge_hash(l,h): + l.extend(h.split(",")) + return l + htags = reduce(merge_hash, hashtags, []) + + query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) + + return query.distinct() + +