from iri_tweet import models, utils
from sqlalchemy.orm import sessionmaker
import anyjson
import sqlite3
import twitter
import re
import requests
from optparse import OptionParser
import simplejson
import time
from blessings import Terminal
import sys
import math
from symbol import except_clause
APPLICATION_NAME = "Tweet recorder user"
CONSUMER_KEY = "Vdr5ZcsjI1G3esTPI8yDg"
CONSUMER_SECRET = "LMhNrY99R6a7E0YbZZkRFpUZpX5EfB1qATbDk1sIVLs"
class TopsyResource(object):
def __init__(self, query, **kwargs):
self.options = kwargs
self.options['q'] = query
self.url = kwargs.get("url", "http://otter.topsy.com/search.json")
self.page = 0
self.req = None
self.res = {}
def __initialize(self):
params = {}
params.update(self.options)
self.req = requests.get(self.url, params=params)
self.res = self.req.json
def __next_page(self):
page = self.res.get("response").get("page") + 1
params = {}
params.update(self.options)
params['page'] = page
self.req = requests.get(self.url, params=params)
self.res = self.req.json
def __iter__(self):
if not self.req:
self.__initialize()
while "response" in self.res and "list" in self.res.get("response") and self.res.get("response").get("list"):
for item in self.res.get("response").get("list"):
yield item
self.__next_page()
def total(self):
if not self.res:
return 0
else:
return self.res.get("response",{}).get("total",0)
def get_option():
parser = OptionParser()
parser.add_option("-d", "--database", dest="database",
help="Input database", metavar="DATABASE")
parser.add_option("-Q", dest="query",
help="query", metavar="QUERY")
parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
help="Token file name")
parser.add_option("-T", dest="topsy_apikey", metavar="TOPSY_APIKEY", default=None,
help="Topsy apikey")
utils.set_logging_options(parser)
return parser.parse_args()
if __name__ == "__main__":
(options, args) = get_option()
utils.set_logging(options);
acess_token_key, access_token_secret = utils.get_oauth_token(options.token_filename, application_name=APPLICATION_NAME, consumer_key=CONSUMER_KEY, consumer_secret=CONSUMER_SECRET)
t = twitter.Twitter(domain="api.twitter.com", auth=twitter.OAuth(acess_token_key, access_token_secret, CONSUMER_KEY, CONSUMER_SECRET), secure=True)
t.secure = True
conn_str = options.database.strip()
if not re.match("^\w+://.+", conn_str):
conn_str = 'sqlite:///' + conn_str
engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True)
session = None
topsy_parameters = {
'apikey': options.topsy_apikey,
'perpage': 100,
'window': 'a',
'type': 'tweet',
'hidden': True,
}
term = Terminal()
try:
session = Session()
results = None
page = 1
print options.query
tr = TopsyResource(options.query, **topsy_parameters)
move_up = 0
for i,item in enumerate(tr):
# get id
url = item.get("url")
tweet_id = url.split("/")[-1]
if move_up > 0:
print((move_up+1)*term.move_up())
move_up = 0
print ("%d/%d:%03d%% - %s - %r" % (i+1, tr.total(), int(float(i+1)/float(tr.total())*100.0), tweet_id, item.get("content") ) + term.clear_eol())
move_up += 1
count_tweet = session.query(models.Tweet).filter_by(id_str=tweet_id).count()
if count_tweet:
continue
try:
tweet = t.statuses.show(id=tweet_id, include_entities=True)
except twitter.api.TwitterHTTPError as e:
if e.e.code == 404 or e.e.code == 403:
continue
else:
raise
processor = utils.TwitterProcessor(tweet, None, None, session, None, options.token_filename)
processor.process()
session.flush()
session.commit()
time_to_sleep = int(math.ceil((tweet.rate_limit_reset - time.mktime(time.gmtime())) / tweet.rate_limit_remaining))
print "rate limit remaining %s of %s" % (str(tweet.rate_limit_remaining), str(tweet.headers.getheader('x-ratelimit-limit'))) + term.clear_eol()
move_up += 1
for i in xrange(time_to_sleep):
if i:
print(2*term.move_up())
else:
move_up += 1
print(("Sleeping for %d seconds, %d remaining" % (time_to_sleep, time_to_sleep-i)) + term.clear_eol())
time.sleep(1)
except twitter.api.TwitterHTTPError as e:
fmt = ("." + e.format) if e.format else ""
print "Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data))
finally:
if session:
session.close()