495 parser.add_option("-v", dest="verbose", action="count", |
495 parser.add_option("-v", dest="verbose", action="count", |
496 help="verbose", metavar="VERBOSE", default=0) |
496 help="verbose", metavar="VERBOSE", default=0) |
497 parser.add_option("-q", dest="quiet", action="count", |
497 parser.add_option("-q", dest="quiet", action="count", |
498 help="quiet", metavar="QUIET", default=0) |
498 help="quiet", metavar="QUIET", default=0) |
499 |
499 |
500 |
500 def get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist): |
501 def get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist): |
501 |
502 |
502 query = query.join(EntityHashtag).join(Hashtag) |
503 query = session.query(Tweet).join(EntityHashtag).join(Hashtag) |
503 |
504 if tweet_exclude_table is not None: |
504 if tweet_exclude_table is not None: |
505 query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable |
505 query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable |
506 |
506 |
507 query = query.filter(Tweet.created_at >= start_date).filter(Tweet.created_at <= end_date) |
507 if start_date: |
|
508 query = query.filter(Tweet.created_at >= start_date) |
|
509 if end_date: |
|
510 query = query.filter(Tweet.created_at <= end_date) |
508 |
511 |
509 if user_whitelist: |
512 if user_whitelist: |
510 query = query.join(User).filter(User.screen_name.in_(user_whitelist)) |
513 query = query.join(User).filter(User.screen_name.in_(user_whitelist)) |
511 |
514 |
512 |
515 |
515 l.extend(h.split(",")) |
518 l.extend(h.split(",")) |
516 return l |
519 return l |
517 htags = reduce(merge_hash, hashtags, []) |
520 htags = reduce(merge_hash, hashtags, []) |
518 |
521 |
519 query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable |
522 query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable |
520 |
|
521 |
523 |
522 return query |
524 return query |
|
525 |
|
526 |
|
527 |
|
528 def get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist): |
|
529 |
|
530 query = session.query(Tweet) |
|
531 query = get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist) |
|
532 return query.order_by(Tweet.created_at) |
523 |
533 |
524 |
534 |
525 def get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table): |
535 def get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table): |
526 |
536 |
527 query = session.query(User).join(Tweet).join(EntityHashtag).join(Hashtag) |
537 query = session.query(User).join(Tweet) |
528 if tweet_exclude_table is not None: |
538 |
529 query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable |
539 query = get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, None) |
530 |
|
531 query = query.filter(Tweet.created_at >= start_date).filter(Tweet.created_at <= end_date) |
|
532 |
|
533 if hashtags : |
|
534 def merge_hash(l,h): |
|
535 l.extend(h.split(",")) |
|
536 return l |
|
537 htags = reduce(merge_hash, hashtags, []) |
|
538 |
|
539 query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable |
|
540 |
540 |
541 return query.distinct() |
541 return query.distinct() |
542 |
542 |
543 logger_name = "iri.tweet" |
543 logger_name = "iri.tweet" |
544 |
544 |