remove dependency to Zend 1. Use composer to reduce the number of dependencies
import argparse
import logging
import math
import re
import time
from blessings import Terminal
import requests
import twitter
from iri_tweet import models, utils
from iri_tweet.processor import TwitterProcessorStatus
logger = logging.getLogger(__name__)
APPLICATION_NAME = "Tweet recorder user"
class TopsyResource(object):
def __init__(self, query, **kwargs):
self.options = kwargs
self.options['q'] = query
self.url = kwargs.get("url", "http://otter.topsy.com/search.json")
self.page = 0
self.req = None
self.res = {}
def __initialize(self):
params = {}
params.update(self.options)
self.req = requests.get(self.url, params=params)
self.res = self.req.json()
def __next_page(self):
page = self.res.get("response").get("page") + 1
params = {}
params.update(self.options)
params['page'] = page
self.req = requests.get(self.url, params=params)
self.res = self.req.json()
def __iter__(self):
if not self.req:
self.__initialize()
while "response" in self.res and "list" in self.res.get("response") and self.res.get("response").get("list"):
for item in self.res.get("response").get("list"):
yield item
self.__next_page()
def total(self):
if not self.res:
return 0
else:
return self.res.get("response",{}).get("total",0)
def get_options():
usage = "usage: %(prog)s [options] <connection_str_or_filepath>"
parser = argparse.ArgumentParser(usage=usage)
parser.add_argument(dest="conn_str",
help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR")
parser.add_argument("-Q", dest="query",
help="query", metavar="QUERY")
parser.add_argument("-k", "--key", dest="consumer_key",
help="Twitter consumer key", metavar="CONSUMER_KEY")
parser.add_argument("-s", "--secret", dest="consumer_secret",
help="Twitter consumer secret", metavar="CONSUMER_SECRET")
parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
help="Token file name")
parser.add_argument("-T", dest="topsy_apikey", metavar="TOPSY_APIKEY", default=None,
help="Topsy apikey")
utils.set_logging_options(parser)
return parser.parse_args()
if __name__ == "__main__":
options = get_options()
utils.set_logging(options);
acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME)
t = twitter.Twitter(domain="api.twitter.com", auth=twitter.OAuth(acess_token_key, access_token_secret, options.consumer_key, options.consumer_secret), secure=True)
t.secure = True
conn_str = options.conn_str.strip()
if not re.match("^\w+://.+", conn_str):
conn_str = 'sqlite:///' + conn_str
engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True)
session = None
topsy_parameters = {
'apikey': options.topsy_apikey,
'perpage': 100,
'window': 'a',
'type': 'tweet',
'hidden': True,
}
term = Terminal()
try:
session = Session()
results = None
page = 1
print options.query
tr = TopsyResource(options.query, **topsy_parameters)
move_up = 0
for i,item in enumerate(tr):
# get id
url = item.get("url")
tweet_id = url.split("/")[-1]
if move_up > 0:
print((move_up+1)*term.move_up())
move_up = 0
print ("%d/%d:%03d%% - %s - %r" % (i+1, tr.total(), int(float(i+1)/float(tr.total())*100.0), tweet_id, item.get("content") ) + term.clear_eol())
move_up += 1
count_tweet = session.query(models.Tweet).filter_by(id_str=tweet_id).count()
if count_tweet:
continue
try:
tweet = t.statuses.show(id=tweet_id, include_entities=True)
except twitter.api.TwitterHTTPError as e:
if e.e.code == 404 or e.e.code == 403:
continue
else:
raise
processor = TwitterProcessorStatus(tweet, None, None, session, None, options.token_filename, logger)
processor.process()
session.flush()
session.commit()
print("rate limit remaining %s of %s" % (str(tweet.rate_limit_remaining), str(tweet.headers.getheader('X-Rate-Limit-Limit'))) + term.clear_eol())
move_up += 1
rate_limit_limit = int(tweet.headers.getheader('X-Rate-Limit-Limit'))
rate_limit_remaining = int(tweet.rate_limit_remaining)
if rate_limit_remaining < rate_limit_limit:
time_to_sleep = 0
else:
time_to_sleep = int(math.ceil((tweet.rate_limit_reset - time.mktime(time.gmtime())) / tweet.rate_limit_remaining))
for i in xrange(time_to_sleep):
if i:
print(2*term.move_up())
else:
move_up += 1
print(("Sleeping for %d seconds, %d remaining" % (time_to_sleep, time_to_sleep-i)) + term.clear_eol())
time.sleep(1)
except twitter.api.TwitterHTTPError as e:
fmt = ("." + e.format) if e.format else ""
print "Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data))
finally:
if session:
session.close()