script/utils/search_topsy.py
changeset 982 11c1322cffe6
parent 888 6fc6637d8403
child 1137 5c757e167687
equal deleted inserted replaced
981:5378e8a44db1 982:11c1322cffe6
     1 from blessings import Terminal
     1 import argparse
     2 from iri_tweet import models, utils
       
     3 from iri_tweet.processor import TwitterProcessorStatus
       
     4 from optparse import OptionParser
       
     5 import logging
     2 import logging
     6 import math
     3 import math
     7 import re
     4 import re
       
     5 import time
       
     6 
       
     7 from blessings import Terminal
     8 import requests
     8 import requests
     9 import time
       
    10 import twitter
     9 import twitter
       
    10 
       
    11 from iri_tweet import models, utils
       
    12 from iri_tweet.processor import TwitterProcessorStatus
       
    13 
    11 
    14 
    12 logger = logging.getLogger(__name__)
    15 logger = logging.getLogger(__name__)
    13 
    16 
    14 APPLICATION_NAME = "Tweet recorder user"
    17 APPLICATION_NAME = "Tweet recorder user"
    15 CONSUMER_KEY = "Vdr5ZcsjI1G3esTPI8yDg"
       
    16 CONSUMER_SECRET = "LMhNrY99R6a7E0YbZZkRFpUZpX5EfB1qATbDk1sIVLs"
       
    17 
    18 
    18 
    19 
    19 class TopsyResource(object):
    20 class TopsyResource(object):
    20     
    21     
    21     def __init__(self, query, **kwargs):
    22     def __init__(self, query, **kwargs):
    22                 
    23 
    23         self.options = kwargs
    24         self.options = kwargs
    24         self.options['q'] = query
    25         self.options['q'] = query
    25         self.url = kwargs.get("url", "http://otter.topsy.com/search.json")
    26         self.url = kwargs.get("url", "http://otter.topsy.com/search.json")
    26         self.page = 0
    27         self.page = 0
    27         self.req = None
    28         self.req = None
    30     def __initialize(self):
    31     def __initialize(self):
    31         
    32         
    32         params = {}
    33         params = {}
    33         params.update(self.options)
    34         params.update(self.options)
    34         self.req = requests.get(self.url, params=params)
    35         self.req = requests.get(self.url, params=params)
    35         self.res = self.req.json
    36         self.res = self.req.json()
    36         
    37         
    37     def __next_page(self):
    38     def __next_page(self):
    38         page = self.res.get("response").get("page") + 1
    39         page = self.res.get("response").get("page") + 1
    39         params = {}
    40         params = {}
    40         params.update(self.options)
    41         params.update(self.options)
    41         params['page'] = page
    42         params['page'] = page
    42         self.req = requests.get(self.url, params=params)
    43         self.req = requests.get(self.url, params=params)
    43         self.res = self.req.json
    44         self.res = self.req.json()
    44 
    45 
    45     def __iter__(self):        
    46     def __iter__(self):        
    46         if not self.req:
    47         if not self.req:
    47             self.__initialize()
    48             self.__initialize()
    48         while "response" in self.res and "list" in self.res.get("response") and self.res.get("response").get("list"):
    49         while "response" in self.res and "list" in self.res.get("response") and self.res.get("response").get("list"):
    56         else:
    57         else:
    57             return self.res.get("response",{}).get("total",0)
    58             return self.res.get("response",{}).get("total",0)
    58             
    59             
    59 
    60 
    60 
    61 
    61 def get_option():
    62 def get_options():
    62     
    63     
    63     parser = OptionParser()
    64     usage = "usage: %(prog)s [options] <connection_str_or_filepath>"
       
    65     
       
    66     parser = argparse.ArgumentParser(usage=usage)
    64 
    67 
    65     parser.add_option("-d", "--database", dest="database",
    68     parser.add_argument(dest="conn_str",
    66                       help="Input database", metavar="DATABASE")
    69                         help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR")
    67     parser.add_option("-Q", dest="query",
    70     parser.add_argument("-Q", dest="query",
    68                       help="query", metavar="QUERY")
    71                       help="query", metavar="QUERY")
    69     parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
    72     parser.add_argument("-k", "--key", dest="consumer_key",
       
    73                         help="Twitter consumer key", metavar="CONSUMER_KEY")
       
    74     parser.add_argument("-s", "--secret", dest="consumer_secret",
       
    75                         help="Twitter consumer secret", metavar="CONSUMER_SECRET")
       
    76     parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
    70                       help="Token file name")
    77                       help="Token file name")
    71     parser.add_option("-T", dest="topsy_apikey", metavar="TOPSY_APIKEY", default=None,
    78     parser.add_argument("-T", dest="topsy_apikey", metavar="TOPSY_APIKEY", default=None,
    72                       help="Topsy apikey")
    79                       help="Topsy apikey")
    73     
    80 
    74     utils.set_logging_options(parser)
    81     utils.set_logging_options(parser)
    75 
    82 
    76     return parser.parse_args()
    83     return parser.parse_args()
    77 
    84 
    78 
    85 
    79 
    86 
    80 if __name__ == "__main__":
    87 if __name__ == "__main__":
    81 
    88 
    82     (options, args) = get_option()
    89     options = get_options()
    83     
    90     
    84     utils.set_logging(options);
    91     utils.set_logging(options);
    85 
    92 
    86 
    93 
    87     acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET, options.token_filename, application_name=APPLICATION_NAME)
    94     acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME)
    88 
    95 
    89     t = twitter.Twitter(domain="api.twitter.com", auth=twitter.OAuth(acess_token_key, access_token_secret, CONSUMER_KEY, CONSUMER_SECRET), secure=True)
    96     t = twitter.Twitter(domain="api.twitter.com", auth=twitter.OAuth(acess_token_key, access_token_secret, options.consumer_key, options.consumer_secret), secure=True)
    90     t.secure = True
    97     t.secure = True
    91     
    98     
    92     conn_str = options.database.strip()
    99     conn_str = options.conn_str.strip()
    93     if not re.match("^\w+://.+", conn_str):
   100     if not re.match("^\w+://.+", conn_str):
    94         conn_str = 'sqlite:///' + conn_str
   101         conn_str = 'sqlite:///' + conn_str
    95     
   102     
    96     engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True)
   103     engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True)
    97     session = None
   104     session = None
   144             
   151             
   145             processor = TwitterProcessorStatus(tweet, None, None, session, None, options.token_filename, logger)
   152             processor = TwitterProcessorStatus(tweet, None, None, session, None, options.token_filename, logger)
   146             processor.process()
   153             processor.process()
   147             session.flush()
   154             session.flush()
   148             session.commit()
   155             session.commit()
   149                         
   156 
   150             time_to_sleep = int(math.ceil((tweet.rate_limit_reset - time.mktime(time.gmtime())) / tweet.rate_limit_remaining))
   157             time_to_sleep = int(math.ceil((tweet.rate_limit_reset - time.mktime(time.gmtime())) / tweet.rate_limit_remaining))
   151             
   158             
   152             print "rate limit remaining %s of %s" % (str(tweet.rate_limit_remaining), str(tweet.headers.getheader('x-ratelimit-limit'))) + term.clear_eol()
   159             print("rate limit remaining %s of %s" % (str(tweet.rate_limit_remaining), str(tweet.headers.getheader('X-Rate-Limit-Limit'))) + term.clear_eol())
   153             move_up += 1
   160             move_up += 1
   154             for i in xrange(time_to_sleep):
   161             for i in xrange(time_to_sleep):
   155                 if i:
   162                 if i:
   156                     print(2*term.move_up())
   163                     print(2*term.move_up())
   157                 else:
   164                 else: