script/lib/iri_tweet/utils.py
changeset 82 210dc265c70f
parent 32 c924e143576f
child 98 6e8930a1b8f7
equal deleted inserted replaced
81:ef088e58ae07 82:210dc265c70f
     1 from models import *
     1 from models import *
       
     2 from sqlalchemy.sql import select, or_
     2 import anyjson
     3 import anyjson
     3 import datetime
     4 import datetime
     4 import email.utils
     5 import email.utils
     5 import logging
     6 import logging
     6 import os.path
     7 import os.path
     9 import twitter.oauth
    10 import twitter.oauth
    10 import twitter.oauth_dance
    11 import twitter.oauth_dance
    11 import twitter_text
    12 import twitter_text
    12 
    13 
    13 
    14 
    14 CACHE_ACCESS_TOKEN = None
    15 CACHE_ACCESS_TOKEN = {}
    15 
    16 
    16 def get_oauth_token(token_file_path=None):
    17 def get_oauth_token(token_file_path=None, application_name=APPLICATION_NAME, consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET):
    17     
    18     
    18     global CACHE_ACCESS_TOKEN
    19     global CACHE_ACCESS_TOKEN
    19     
    20     
    20     if CACHE_ACCESS_TOKEN is not None:
    21     if CACHE_ACCESS_TOKEN is not None and application_name in CACHE_ACCESS_TOKEN:
    21         return CACHE_ACCESS_TOKEN
    22         return CACHE_ACCESS_TOKEN[application_name]
    22     
    23     
    23     if token_file_path and os.path.exists(token_file_path):
    24     if token_file_path and os.path.exists(token_file_path):
    24         logging.debug("reading token from file %s" % token_file_path)
    25         logging.debug("reading token from file %s" % token_file_path)
    25         CACHE_ACCESS_TOKEN = twitter.oauth.read_token_file(token_file_path)
    26         CACHE_ACCESS_TOKEN[application_name] = twitter.oauth.read_token_file(token_file_path)
    26         return CACHE_ACCESS_TOKEN
    27         return CACHE_ACCESS_TOKEN[application_name]
    27         #read access token info from path
    28         #read access token info from path
    28     
    29     
    29     if 'ACCESS_TOKEN_KEY' in dict() and 'ACCESS_TOKEN_SECRET' in dict() and ACCESS_TOKEN_KEY and ACCESS_TOKEN_SECRET:
    30     if 'ACCESS_TOKEN_KEY' in dict() and 'ACCESS_TOKEN_SECRET' in dict() and ACCESS_TOKEN_KEY and ACCESS_TOKEN_SECRET:
    30         return ACCESS_TOKEN_KEY,ACCESS_TOKEN_SECRET
    31         return ACCESS_TOKEN_KEY,ACCESS_TOKEN_SECRET
    31     
    32     
    32     CACHE_ACCESS_TOKEN = twitter.oauth_dance.oauth_dance(APPLICATION_NAME, CONSUMER_KEY, CONSUMER_SECRET, token_file_path)
    33     CACHE_ACCESS_TOKEN[application_name] = twitter.oauth_dance.oauth_dance(application_name, consumer_key, consumer_secret, token_file_path)
    33     return CACHE_ACCESS_TOKEN
    34     return CACHE_ACCESS_TOKEN[application_name]
    34 
    35 
    35 def parse_date(date_str):
    36 def parse_date(date_str):
    36     ts = email.utils.parsedate_tz(date_str)
    37     ts = email.utils.parsedate_tz(date_str)
    37     return datetime.datetime(*ts[0:7])
    38     return datetime.datetime(*ts[0:7])
    38 
    39 
   214         ts_copy = adapt_fields(self.json_dict, fields_adapter["stream"]["tweet"])
   215         ts_copy = adapt_fields(self.json_dict, fields_adapter["stream"]["tweet"])
   215         
   216         
   216         # get or create user
   217         # get or create user
   217         user = self.__get_user(self.json_dict["user"])
   218         user = self.__get_user(self.json_dict["user"])
   218         if user is None:
   219         if user is None:
   219             logging.warning("USER not found " + repr(ts["user"]))
   220             logging.warning("USER not found " + repr(self.json_dict["user"]))
   220             ts_copy["user"] = None
   221             ts_copy["user"] = None
   221             ts_copy["user_id"] = None
   222             ts_copy["user_id"] = None
   222         else:
   223         else:
   223             ts_copy["user"] = user
   224             ts_copy["user"] = user
   224             ts_copy["user_id"] = ts_copy["user"].id
   225             ts_copy["user_id"] = ts_copy["user"].id
   319                       help="log to file", metavar="LOG", default="stderr")
   320                       help="log to file", metavar="LOG", default="stderr")
   320     parser.add_option("-v", dest="verbose", action="count",
   321     parser.add_option("-v", dest="verbose", action="count",
   321                       help="verbose", metavar="VERBOSE", default=0)
   322                       help="verbose", metavar="VERBOSE", default=0)
   322     parser.add_option("-q", dest="quiet", action="count",
   323     parser.add_option("-q", dest="quiet", action="count",
   323                       help="quiet", metavar="QUIET", default=0)
   324                       help="quiet", metavar="QUIET", default=0)
       
   325 
       
   326     
       
   327 def get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table):
       
   328     
       
   329     query = session.query(Tweet).join(EntityHashtag).join(Hashtag)
       
   330     if tweet_exclude_table is not None:
       
   331         query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id])))
       
   332         
       
   333     query = query.filter(Tweet.created_at >=  start_date).filter(Tweet.created_at <=  end_date)
       
   334     
       
   335     if hashtags :
       
   336         def merge_hash(l,h):
       
   337             l.extend(h.split(","))
       
   338             return l
       
   339         htags = reduce(merge_hash, hashtags, [])
       
   340         
       
   341         query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags)))
       
   342     
       
   343     return query
       
   344     
       
   345 
       
   346 def get_user_query(session, start_date, end_date, hashtags, tweet_exclude_table):
       
   347     
       
   348     query = session.query(User).join(Tweet).join(EntityHashtag).join(Hashtag)
       
   349     if tweet_exclude_table is not None:
       
   350         query = query.filter(~Tweet.id.in_(select([tweet_exclude_table.c.id])))
       
   351         
       
   352     query = query.filter(Tweet.created_at >=  start_date).filter(Tweet.created_at <=  end_date)
       
   353     
       
   354     if hashtags :
       
   355         def merge_hash(l,h):
       
   356             l.extend(h.split(","))
       
   357             return l
       
   358         htags = reduce(merge_hash, hashtags, [])
       
   359         
       
   360         query = query.filter(or_(*map(lambda h: Hashtag.text.contains(h), htags)))
       
   361     
       
   362     return query.distinct()
       
   363 
       
   364