--- a/script/utils/search_topsy.py Wed Jan 02 17:49:19 2019 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,181 +0,0 @@
-import argparse
-import logging
-import math
-import re
-import time
-
-from blessings import Terminal
-import requests
-import twitter
-
-from iri_tweet import models, utils
-from iri_tweet.processor import TwitterProcessorStatus
-
-
-logger = logging.getLogger(__name__)
-
-APPLICATION_NAME = "Tweet recorder user"
-
-
-class TopsyResource(object):
-
- def __init__(self, query, **kwargs):
-
- self.options = kwargs
- self.options['q'] = query
- self.url = kwargs.get("url", "http://otter.topsy.com/search.json")
- self.page = 0
- self.req = None
- self.res = {}
-
- def __initialize(self):
-
- params = {}
- params.update(self.options)
- self.req = requests.get(self.url, params=params)
- self.res = self.req.json()
-
- def __next_page(self):
- page = self.res.get("response").get("page") + 1
- params = {}
- params.update(self.options)
- params['page'] = page
- self.req = requests.get(self.url, params=params)
- self.res = self.req.json()
-
- def __iter__(self):
- if not self.req:
- self.__initialize()
- while "response" in self.res and "list" in self.res.get("response") and self.res.get("response").get("list"):
- for item in self.res.get("response").get("list"):
- yield item
- self.__next_page()
-
- def total(self):
- if not self.res:
- return 0
- else:
- return self.res.get("response",{}).get("total",0)
-
-
-
-def get_options():
-
- usage = "usage: %(prog)s [options] <connection_str_or_filepath>"
-
- parser = argparse.ArgumentParser(usage=usage)
-
- parser.add_argument(dest="conn_str",
- help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR")
- parser.add_argument("-Q", dest="query",
- help="query", metavar="QUERY")
- parser.add_argument("-k", "--key", dest="consumer_key",
- help="Twitter consumer key", metavar="CONSUMER_KEY")
- parser.add_argument("-s", "--secret", dest="consumer_secret",
- help="Twitter consumer secret", metavar="CONSUMER_SECRET")
- parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
- help="Token file name")
- parser.add_argument("-T", dest="topsy_apikey", metavar="TOPSY_APIKEY", default=None,
- help="Topsy apikey")
-
- utils.set_logging_options(parser)
-
- return parser.parse_args()
-
-
-
-if __name__ == "__main__":
-
- options = get_options()
-
- utils.set_logging(options);
-
-
- acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME)
-
- t = twitter.Twitter(domain="api.twitter.com", auth=twitter.OAuth(acess_token_key, access_token_secret, options.consumer_key, options.consumer_secret), secure=True)
- t.secure = True
-
- conn_str = options.conn_str.strip()
- if not re.match("^\w+://.+", conn_str):
- conn_str = 'sqlite:///' + conn_str
-
- engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True)
- session = None
-
-
- topsy_parameters = {
- 'apikey': options.topsy_apikey,
- 'perpage': 100,
- 'window': 'a',
- 'type': 'tweet',
- 'hidden': True,
- }
-
- term = Terminal()
-
- try:
- session = Session()
-
- results = None
- page = 1
- print options.query
-
- tr = TopsyResource(options.query, **topsy_parameters)
-
- move_up = 0
-
- for i,item in enumerate(tr):
- # get id
- url = item.get("url")
- tweet_id = url.split("/")[-1]
-
- if move_up > 0:
- print((move_up+1)*term.move_up())
- move_up = 0
-
- print ("%d/%d:%03d%% - %s - %r" % (i+1, tr.total(), int(float(i+1)/float(tr.total())*100.0), tweet_id, item.get("content") ) + term.clear_eol())
- move_up += 1
-
- count_tweet = session.query(models.Tweet).filter_by(id_str=tweet_id).count()
-
- if count_tweet:
- continue
- try:
- tweet = t.statuses.show(id=tweet_id, include_entities=True)
- except twitter.api.TwitterHTTPError as e:
- if e.e.code == 404 or e.e.code == 403:
- continue
- else:
- raise
-
- processor = TwitterProcessorStatus(tweet, None, None, session, None, options.token_filename, logger)
- processor.process()
- session.flush()
- session.commit()
-
- print("rate limit remaining %s of %s" % (str(tweet.rate_limit_remaining), str(tweet.headers.getheader('X-Rate-Limit-Limit'))) + term.clear_eol())
- move_up += 1
- rate_limit_limit = int(tweet.headers.getheader('X-Rate-Limit-Limit'))
- rate_limit_remaining = int(tweet.rate_limit_remaining)
-
- if rate_limit_remaining < rate_limit_limit:
- time_to_sleep = 0
- else:
- time_to_sleep = int(math.ceil((tweet.rate_limit_reset - time.mktime(time.gmtime())) / tweet.rate_limit_remaining))
-
- for i in xrange(time_to_sleep):
- if i:
- print(2*term.move_up())
- else:
- move_up += 1
- print(("Sleeping for %d seconds, %d remaining" % (time_to_sleep, time_to_sleep-i)) + term.clear_eol())
- time.sleep(1)
-
- except twitter.api.TwitterHTTPError as e:
- fmt = ("." + e.format) if e.format else ""
- print "Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data))
-
- finally:
- if session:
- session.close()