script/lib/iri_tweet/utils.py
changeset 289 a5eff8f2b81d
parent 255 500cd0405c7a
child 409 f7ceddf99d6d
equal deleted inserted replaced
288:4c870c767d3e 289:a5eff8f2b81d
   495     parser.add_option("-v", dest="verbose", action="count",
   495     parser.add_option("-v", dest="verbose", action="count",
   496                       help="verbose", metavar="VERBOSE", default=0)
   496                       help="verbose", metavar="VERBOSE", default=0)
   497     parser.add_option("-q", dest="quiet", action="count",
   497     parser.add_option("-q", dest="quiet", action="count",
   498                       help="quiet", metavar="QUIET", default=0)
   498                       help="quiet", metavar="QUIET", default=0)
   499 
   499 
   500     
   500 def get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist):
   501 def get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist):
   501     
   502     
   502     query = query.join(EntityHashtag).join(Hashtag)
   503     query = session.query(Tweet).join(EntityHashtag).join(Hashtag)
   503     
   504     if tweet_exclude_table is not None:
   504     if tweet_exclude_table is not None:
   505         query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable
   505         query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable
   506         
   506     
   507     query = query.filter(Tweet.created_at >=  start_date).filter(Tweet.created_at <=  end_date)
   507     if start_date:
       
   508         query = query.filter(Tweet.created_at >=  start_date)
       
   509     if end_date:
       
   510         query = query.filter(Tweet.created_at <=  end_date)
   508 
   511 
   509     if user_whitelist:
   512     if user_whitelist:
   510         query = query.join(User).filter(User.screen_name.in_(user_whitelist))
   513         query = query.join(User).filter(User.screen_name.in_(user_whitelist))
   511 
   514 
   512     
   515     
   515             l.extend(h.split(","))
   518             l.extend(h.split(","))
   516             return l
   519             return l
   517         htags = reduce(merge_hash, hashtags, [])
   520         htags = reduce(merge_hash, hashtags, [])
   518         
   521         
   519         query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable
   522         query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable
   520         
       
   521     
   523     
   522     return query
   524     return query
       
   525 
       
   526     
       
   527     
       
   528 def get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist):
       
   529     
       
   530     query = session.query(Tweet)
       
   531     query = get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist) 
       
   532     return query.order_by(Tweet.created_at)
   523     
   533     
   524 
   534 
   525 def get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table):
   535 def get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table):
   526     
   536     
   527     query = session.query(User).join(Tweet).join(EntityHashtag).join(Hashtag)
   537     query = session.query(User).join(Tweet)
   528     if tweet_exclude_table is not None:
   538     
   529         query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id]))) #@UndefinedVariable
   539     query = get_base_query(session, query, start_date, end_date, hashtags, tweet_exclude_table, None)    
   530         
       
   531     query = query.filter(Tweet.created_at >=  start_date).filter(Tweet.created_at <=  end_date)
       
   532     
       
   533     if hashtags :
       
   534         def merge_hash(l,h):
       
   535             l.extend(h.split(","))
       
   536             return l
       
   537         htags = reduce(merge_hash, hashtags, [])
       
   538         
       
   539         query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags))) #@UndefinedVariable
       
   540     
   540     
   541     return query.distinct()
   541     return query.distinct()
   542 
   542 
   543 logger_name = "iri.tweet"
   543 logger_name = "iri.tweet"
   544 
   544