--- a/script/utils/search_twitter_json.py Fri Dec 21 12:33:01 2018 +0100
+++ b/script/utils/search_twitter_json.py Wed Jan 02 17:49:19 2019 +0100
@@ -13,7 +13,6 @@
from iri_tweet import models, utils
from iri_tweet.processor import TwitterProcessorStatus
-from lxml import html
import json
from pyquery import PyQuery
@@ -35,8 +34,6 @@
def __iter__(self):
- results = []
-
while True:
json = self.get_json_response()
if len(json['items_html'].strip()) == 0:
@@ -51,13 +48,13 @@
for tweetHTML in tweets:
tweet_pq = PyQuery(tweetHTML)
- username = tweet_pq("span.username.js-action-profile-name b").text();
- txt = re.sub(r"\s+", " ", re.sub(r"[^\x00-\x7F]", "", tweet_pq("p.js-tweet-text").text()).replace('# ', '#').replace('@ ', '@'));
- retweets = int(tweet_pq("span.ProfileTweet-action--retweet span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""));
- favorites = int(tweet_pq("span.ProfileTweet-action--favorite span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""));
- date_sec = int(tweet_pq("small.time span.js-short-timestamp").attr("data-time"));
- id = tweet_pq.attr("data-tweet-id");
- permalink = tweet_pq.attr("data-permalink-path");
+ username = tweet_pq("span.username.js-action-profile-name b").text()
+ txt = re.sub(r"\s+", " ", re.sub(r"[^\x00-\x7F]", "", tweet_pq("p.js-tweet-text").text()).replace('# ', '#').replace('@ ', '@'))
+ retweets = int(tweet_pq("span.ProfileTweet-action--retweet span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""))
+ favorites = int(tweet_pq("span.ProfileTweet-action--favorite span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""))
+ date_sec = int(tweet_pq("small.time span.js-short-timestamp").attr("data-time"))
+ id = tweet_pq.attr("data-tweet-id")
+ permalink = tweet_pq.attr("data-permalink-path")
geo = ''
geo_span = tweet_pq('span.Tweet-geo')
@@ -129,7 +126,7 @@
options = get_options()
- utils.set_logging(options);
+ utils.set_logging(options)
acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME)
@@ -138,7 +135,7 @@
t.secure = True
conn_str = options.conn_str.strip()
- if not re.match("^\w+://.+", conn_str):
+ if not re.match(r"^\w+://.+", conn_str):
conn_str = 'sqlite:///' + conn_str
engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True)
@@ -151,7 +148,7 @@
session = Session()
results = None
- print options.query
+ print(options.query)
tm = TweetManager(options.query)
@@ -188,9 +185,9 @@
session.flush()
session.commit()
- print("rate limit remaining %s of %s" % (str(tweet.rate_limit_remaining), str(tweet.headers.getheader('X-Rate-Limit-Limit'))) + term.clear_eol())
+ print("rate limit remaining %s of %s" % (str(tweet.rate_limit_remaining), str(tweet.headers['X-Rate-Limit-Limit'])) + term.clear_eol())
move_up += 1
- rate_limit_limit = int(tweet.headers.getheader('X-Rate-Limit-Limit'))
+ rate_limit_limit = int(tweet.headers['X-Rate-Limit-Limit'])
rate_limit_remaining = int(tweet.rate_limit_remaining)
if rate_limit_remaining > rate_limit_limit:
@@ -198,7 +195,7 @@
else:
time_to_sleep = int(math.ceil((tweet.rate_limit_reset - time.mktime(time.gmtime())) / tweet.rate_limit_remaining))
- for i in xrange(time_to_sleep):
+ for i in range(time_to_sleep):
if i:
print(2*term.move_up())
else:
@@ -208,7 +205,7 @@
except twitter.api.TwitterHTTPError as e:
fmt = ("." + e.format) if e.format else ""
- print "Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data))
+ print("Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data)))
finally:
if session: