1 import argparse |
|
2 import logging |
|
3 import math |
|
4 import re |
|
5 import time |
|
6 |
|
7 from blessings import Terminal |
|
8 import requests |
|
9 import twitter |
|
10 |
|
11 from iri_tweet import models, utils |
|
12 from iri_tweet.processor import TwitterProcessorStatus |
|
13 |
|
14 |
|
15 logger = logging.getLogger(__name__) |
|
16 |
|
17 APPLICATION_NAME = "Tweet recorder user" |
|
18 |
|
19 |
|
20 class TopsyResource(object): |
|
21 |
|
22 def __init__(self, query, **kwargs): |
|
23 |
|
24 self.options = kwargs |
|
25 self.options['q'] = query |
|
26 self.url = kwargs.get("url", "http://otter.topsy.com/search.json") |
|
27 self.page = 0 |
|
28 self.req = None |
|
29 self.res = {} |
|
30 |
|
31 def __initialize(self): |
|
32 |
|
33 params = {} |
|
34 params.update(self.options) |
|
35 self.req = requests.get(self.url, params=params) |
|
36 self.res = self.req.json() |
|
37 |
|
38 def __next_page(self): |
|
39 page = self.res.get("response").get("page") + 1 |
|
40 params = {} |
|
41 params.update(self.options) |
|
42 params['page'] = page |
|
43 self.req = requests.get(self.url, params=params) |
|
44 self.res = self.req.json() |
|
45 |
|
46 def __iter__(self): |
|
47 if not self.req: |
|
48 self.__initialize() |
|
49 while "response" in self.res and "list" in self.res.get("response") and self.res.get("response").get("list"): |
|
50 for item in self.res.get("response").get("list"): |
|
51 yield item |
|
52 self.__next_page() |
|
53 |
|
54 def total(self): |
|
55 if not self.res: |
|
56 return 0 |
|
57 else: |
|
58 return self.res.get("response",{}).get("total",0) |
|
59 |
|
60 |
|
61 |
|
62 def get_options(): |
|
63 |
|
64 usage = "usage: %(prog)s [options] <connection_str_or_filepath>" |
|
65 |
|
66 parser = argparse.ArgumentParser(usage=usage) |
|
67 |
|
68 parser.add_argument(dest="conn_str", |
|
69 help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR") |
|
70 parser.add_argument("-Q", dest="query", |
|
71 help="query", metavar="QUERY") |
|
72 parser.add_argument("-k", "--key", dest="consumer_key", |
|
73 help="Twitter consumer key", metavar="CONSUMER_KEY") |
|
74 parser.add_argument("-s", "--secret", dest="consumer_secret", |
|
75 help="Twitter consumer secret", metavar="CONSUMER_SECRET") |
|
76 parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token", |
|
77 help="Token file name") |
|
78 parser.add_argument("-T", dest="topsy_apikey", metavar="TOPSY_APIKEY", default=None, |
|
79 help="Topsy apikey") |
|
80 |
|
81 utils.set_logging_options(parser) |
|
82 |
|
83 return parser.parse_args() |
|
84 |
|
85 |
|
86 |
|
87 if __name__ == "__main__": |
|
88 |
|
89 options = get_options() |
|
90 |
|
91 utils.set_logging(options); |
|
92 |
|
93 |
|
94 acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME) |
|
95 |
|
96 t = twitter.Twitter(domain="api.twitter.com", auth=twitter.OAuth(acess_token_key, access_token_secret, options.consumer_key, options.consumer_secret), secure=True) |
|
97 t.secure = True |
|
98 |
|
99 conn_str = options.conn_str.strip() |
|
100 if not re.match("^\w+://.+", conn_str): |
|
101 conn_str = 'sqlite:///' + conn_str |
|
102 |
|
103 engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True) |
|
104 session = None |
|
105 |
|
106 |
|
107 topsy_parameters = { |
|
108 'apikey': options.topsy_apikey, |
|
109 'perpage': 100, |
|
110 'window': 'a', |
|
111 'type': 'tweet', |
|
112 'hidden': True, |
|
113 } |
|
114 |
|
115 term = Terminal() |
|
116 |
|
117 try: |
|
118 session = Session() |
|
119 |
|
120 results = None |
|
121 page = 1 |
|
122 print options.query |
|
123 |
|
124 tr = TopsyResource(options.query, **topsy_parameters) |
|
125 |
|
126 move_up = 0 |
|
127 |
|
128 for i,item in enumerate(tr): |
|
129 # get id |
|
130 url = item.get("url") |
|
131 tweet_id = url.split("/")[-1] |
|
132 |
|
133 if move_up > 0: |
|
134 print((move_up+1)*term.move_up()) |
|
135 move_up = 0 |
|
136 |
|
137 print ("%d/%d:%03d%% - %s - %r" % (i+1, tr.total(), int(float(i+1)/float(tr.total())*100.0), tweet_id, item.get("content") ) + term.clear_eol()) |
|
138 move_up += 1 |
|
139 |
|
140 count_tweet = session.query(models.Tweet).filter_by(id_str=tweet_id).count() |
|
141 |
|
142 if count_tweet: |
|
143 continue |
|
144 try: |
|
145 tweet = t.statuses.show(id=tweet_id, include_entities=True) |
|
146 except twitter.api.TwitterHTTPError as e: |
|
147 if e.e.code == 404 or e.e.code == 403: |
|
148 continue |
|
149 else: |
|
150 raise |
|
151 |
|
152 processor = TwitterProcessorStatus(tweet, None, None, session, None, options.token_filename, logger) |
|
153 processor.process() |
|
154 session.flush() |
|
155 session.commit() |
|
156 |
|
157 print("rate limit remaining %s of %s" % (str(tweet.rate_limit_remaining), str(tweet.headers.getheader('X-Rate-Limit-Limit'))) + term.clear_eol()) |
|
158 move_up += 1 |
|
159 rate_limit_limit = int(tweet.headers.getheader('X-Rate-Limit-Limit')) |
|
160 rate_limit_remaining = int(tweet.rate_limit_remaining) |
|
161 |
|
162 if rate_limit_remaining < rate_limit_limit: |
|
163 time_to_sleep = 0 |
|
164 else: |
|
165 time_to_sleep = int(math.ceil((tweet.rate_limit_reset - time.mktime(time.gmtime())) / tweet.rate_limit_remaining)) |
|
166 |
|
167 for i in xrange(time_to_sleep): |
|
168 if i: |
|
169 print(2*term.move_up()) |
|
170 else: |
|
171 move_up += 1 |
|
172 print(("Sleeping for %d seconds, %d remaining" % (time_to_sleep, time_to_sleep-i)) + term.clear_eol()) |
|
173 time.sleep(1) |
|
174 |
|
175 except twitter.api.TwitterHTTPError as e: |
|
176 fmt = ("." + e.format) if e.format else "" |
|
177 print "Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data)) |
|
178 |
|
179 finally: |
|
180 if session: |
|
181 session.close() |
|