49 break |
46 break |
50 |
47 |
51 for tweetHTML in tweets: |
48 for tweetHTML in tweets: |
52 tweet_pq = PyQuery(tweetHTML) |
49 tweet_pq = PyQuery(tweetHTML) |
53 |
50 |
54 username = tweet_pq("span.username.js-action-profile-name b").text(); |
51 username = tweet_pq("span.username.js-action-profile-name b").text() |
55 txt = re.sub(r"\s+", " ", re.sub(r"[^\x00-\x7F]", "", tweet_pq("p.js-tweet-text").text()).replace('# ', '#').replace('@ ', '@')); |
52 txt = re.sub(r"\s+", " ", re.sub(r"[^\x00-\x7F]", "", tweet_pq("p.js-tweet-text").text()).replace('# ', '#').replace('@ ', '@')) |
56 retweets = int(tweet_pq("span.ProfileTweet-action--retweet span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", "")); |
53 retweets = int(tweet_pq("span.ProfileTweet-action--retweet span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", "")) |
57 favorites = int(tweet_pq("span.ProfileTweet-action--favorite span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", "")); |
54 favorites = int(tweet_pq("span.ProfileTweet-action--favorite span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", "")) |
58 date_sec = int(tweet_pq("small.time span.js-short-timestamp").attr("data-time")); |
55 date_sec = int(tweet_pq("small.time span.js-short-timestamp").attr("data-time")) |
59 id = tweet_pq.attr("data-tweet-id"); |
56 id = tweet_pq.attr("data-tweet-id") |
60 permalink = tweet_pq.attr("data-permalink-path"); |
57 permalink = tweet_pq.attr("data-permalink-path") |
61 |
58 |
62 geo = '' |
59 geo = '' |
63 geo_span = tweet_pq('span.Tweet-geo') |
60 geo_span = tweet_pq('span.Tweet-geo') |
64 if len(geo_span) > 0: |
61 if len(geo_span) > 0: |
65 geo = geo_span.attr('title') |
62 geo = geo_span.attr('title') |
127 |
124 |
128 if __name__ == "__main__": |
125 if __name__ == "__main__": |
129 |
126 |
130 options = get_options() |
127 options = get_options() |
131 |
128 |
132 utils.set_logging(options); |
129 utils.set_logging(options) |
133 |
130 |
134 |
131 |
135 acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME) |
132 acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME) |
136 |
133 |
137 t = twitter.Twitter(domain="api.twitter.com", auth=twitter.OAuth(acess_token_key, access_token_secret, options.consumer_key, options.consumer_secret), secure=True) |
134 t = twitter.Twitter(domain="api.twitter.com", auth=twitter.OAuth(acess_token_key, access_token_secret, options.consumer_key, options.consumer_secret), secure=True) |
138 t.secure = True |
135 t.secure = True |
139 |
136 |
140 conn_str = options.conn_str.strip() |
137 conn_str = options.conn_str.strip() |
141 if not re.match("^\w+://.+", conn_str): |
138 if not re.match(r"^\w+://.+", conn_str): |
142 conn_str = 'sqlite:///' + conn_str |
139 conn_str = 'sqlite:///' + conn_str |
143 |
140 |
144 engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True) |
141 engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True) |
145 session = None |
142 session = None |
146 |
143 |
186 processor = TwitterProcessorStatus(tweet, None, None, session, None, options.token_filename, logger) |
183 processor = TwitterProcessorStatus(tweet, None, None, session, None, options.token_filename, logger) |
187 processor.process() |
184 processor.process() |
188 session.flush() |
185 session.flush() |
189 session.commit() |
186 session.commit() |
190 |
187 |
191 print("rate limit remaining %s of %s" % (str(tweet.rate_limit_remaining), str(tweet.headers.getheader('X-Rate-Limit-Limit'))) + term.clear_eol()) |
188 print("rate limit remaining %s of %s" % (str(tweet.rate_limit_remaining), str(tweet.headers['X-Rate-Limit-Limit'])) + term.clear_eol()) |
192 move_up += 1 |
189 move_up += 1 |
193 rate_limit_limit = int(tweet.headers.getheader('X-Rate-Limit-Limit')) |
190 rate_limit_limit = int(tweet.headers['X-Rate-Limit-Limit']) |
194 rate_limit_remaining = int(tweet.rate_limit_remaining) |
191 rate_limit_remaining = int(tweet.rate_limit_remaining) |
195 |
192 |
196 if rate_limit_remaining > rate_limit_limit: |
193 if rate_limit_remaining > rate_limit_limit: |
197 time_to_sleep = 0 |
194 time_to_sleep = 0 |
198 else: |
195 else: |
199 time_to_sleep = int(math.ceil((tweet.rate_limit_reset - time.mktime(time.gmtime())) / tweet.rate_limit_remaining)) |
196 time_to_sleep = int(math.ceil((tweet.rate_limit_reset - time.mktime(time.gmtime())) / tweet.rate_limit_remaining)) |
200 |
197 |
201 for i in xrange(time_to_sleep): |
198 for i in range(time_to_sleep): |
202 if i: |
199 if i: |
203 print(2*term.move_up()) |
200 print(2*term.move_up()) |
204 else: |
201 else: |
205 move_up += 1 |
202 move_up += 1 |
206 print(("Sleeping for %d seconds, %d remaining" % (time_to_sleep, time_to_sleep-i)) + term.clear_eol()) |
203 print(("Sleeping for %d seconds, %d remaining" % (time_to_sleep, time_to_sleep-i)) + term.clear_eol()) |
207 time.sleep(1) |
204 time.sleep(1) |
208 |
205 |
209 except twitter.api.TwitterHTTPError as e: |
206 except twitter.api.TwitterHTTPError as e: |
210 fmt = ("." + e.format) if e.format else "" |
207 fmt = ("." + e.format) if e.format else "" |
211 print "Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data)) |
208 print("Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data))) |
212 |
209 |
213 finally: |
210 finally: |
214 if session: |
211 if session: |
215 session.close() |
212 session.close() |