diff -r 4c870c767d3e -r a5eff8f2b81d script/lib/iri_tweet/utils.py --- a/script/lib/iri_tweet/utils.py Tue Sep 20 16:55:16 2011 +0200 +++ b/script/lib/iri_tweet/utils.py Thu Sep 22 12:37:53 2011 +0200 @@ -497,14 +497,17 @@ parser.add_option("-q", dest="quiet", action="count", help="quiet", metavar="QUIET", default=0) +def get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist): -def get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist): + query = query.join(EntityHashtag).join(Hashtag) - query = session.query(Tweet).join(EntityHashtag).join(Hashtag) if tweet_exclude_table is not None: query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable - - query = query.filter(Tweet.created_at >= start_date).filter(Tweet.created_at <= end_date) + + if start_date: + query = query.filter(Tweet.created_at >= start_date) + if end_date: + query = query.filter(Tweet.created_at <= end_date) if user_whitelist: query = query.join(User).filter(User.screen_name.in_(user_whitelist)) @@ -517,26 +520,23 @@ htags = reduce(merge_hash, hashtags, []) query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable - return query + + + +def get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist): + + query = session.query(Tweet) + query = get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist) + return query.order_by(Tweet.created_at) def get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table): - query = session.query(User).join(Tweet).join(EntityHashtag).join(Hashtag) - if tweet_exclude_table is not None: - query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable - - query = query.filter(Tweet.created_at >= start_date).filter(Tweet.created_at <= end_date) + query = session.query(User).join(Tweet) - if hashtags : - def merge_hash(l,h): - l.extend(h.split(",")) - return l - htags = reduce(merge_hash, hashtags, []) - - query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable + query = get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, None) return query.distinct()