update listener. add support for twitter regulation messages. update virtualenv
from blessings import Terminal
from iri_tweet import models, utils
from iri_tweet.processor import TwitterProcessorStatus
from optparse import OptionParser
import logging
import math
import re
import requests
import time
import twitter
logger = logging.getLogger(__name__)
APPLICATION_NAME = "Tweet recorder user"
CONSUMER_KEY = "Vdr5ZcsjI1G3esTPI8yDg"
CONSUMER_SECRET = "LMhNrY99R6a7E0YbZZkRFpUZpX5EfB1qATbDk1sIVLs"
class TopsyResource(object):
def __init__(self, query, **kwargs):
self.options = kwargs
self.options['q'] = query
self.url = kwargs.get("url", "http://otter.topsy.com/search.json")
self.page = 0
self.req = None
self.res = {}
def __initialize(self):
params = {}
params.update(self.options)
self.req = requests.get(self.url, params=params)
self.res = self.req.json
def __next_page(self):
page = self.res.get("response").get("page") + 1
params = {}
params.update(self.options)
params['page'] = page
self.req = requests.get(self.url, params=params)
self.res = self.req.json
def __iter__(self):
if not self.req:
self.__initialize()
while "response" in self.res and "list" in self.res.get("response") and self.res.get("response").get("list"):
for item in self.res.get("response").get("list"):
yield item
self.__next_page()
def total(self):
if not self.res:
return 0
else:
return self.res.get("response",{}).get("total",0)
def get_option():
parser = OptionParser()
parser.add_option("-d", "--database", dest="database",
help="Input database", metavar="DATABASE")
parser.add_option("-Q", dest="query",
help="query", metavar="QUERY")
parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
help="Token file name")
parser.add_option("-T", dest="topsy_apikey", metavar="TOPSY_APIKEY", default=None,
help="Topsy apikey")
utils.set_logging_options(parser)
return parser.parse_args()
if __name__ == "__main__":
(options, args) = get_option()
utils.set_logging(options);
acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET, options.token_filename, application_name=APPLICATION_NAME)
t = twitter.Twitter(domain="api.twitter.com", auth=twitter.OAuth(acess_token_key, access_token_secret, CONSUMER_KEY, CONSUMER_SECRET), secure=True)
t.secure = True
conn_str = options.database.strip()
if not re.match("^\w+://.+", conn_str):
conn_str = 'sqlite:///' + conn_str
engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True)
session = None
topsy_parameters = {
'apikey': options.topsy_apikey,
'perpage': 100,
'window': 'a',
'type': 'tweet',
'hidden': True,
}
term = Terminal()
try:
session = Session()
results = None
page = 1
print options.query
tr = TopsyResource(options.query, **topsy_parameters)
move_up = 0
for i,item in enumerate(tr):
# get id
url = item.get("url")
tweet_id = url.split("/")[-1]
if move_up > 0:
print((move_up+1)*term.move_up())
move_up = 0
print ("%d/%d:%03d%% - %s - %r" % (i+1, tr.total(), int(float(i+1)/float(tr.total())*100.0), tweet_id, item.get("content") ) + term.clear_eol())
move_up += 1
count_tweet = session.query(models.Tweet).filter_by(id_str=tweet_id).count()
if count_tweet:
continue
try:
tweet = t.statuses.show(id=tweet_id, include_entities=True)
except twitter.api.TwitterHTTPError as e:
if e.e.code == 404 or e.e.code == 403:
continue
else:
raise
processor = TwitterProcessorStatus(tweet, None, None, session, None, options.token_filename, logger)
processor.process()
session.flush()
session.commit()
time_to_sleep = int(math.ceil((tweet.rate_limit_reset - time.mktime(time.gmtime())) / tweet.rate_limit_remaining))
print "rate limit remaining %s of %s" % (str(tweet.rate_limit_remaining), str(tweet.headers.getheader('x-ratelimit-limit'))) + term.clear_eol()
move_up += 1
for i in xrange(time_to_sleep):
if i:
print(2*term.move_up())
else:
move_up += 1
print(("Sleeping for %d seconds, %d remaining" % (time_to_sleep, time_to_sleep-i)) + term.clear_eol())
time.sleep(1)
except twitter.api.TwitterHTTPError as e:
fmt = ("." + e.format) if e.format else ""
print "Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data))
finally:
if session:
session.close()