diff -r efbda157eb57 -r 184372ec27e2 script/utils/search_twitter_json.py --- a/script/utils/search_twitter_json.py Fri Dec 21 12:33:01 2018 +0100 +++ b/script/utils/search_twitter_json.py Wed Jan 02 17:49:19 2019 +0100 @@ -13,7 +13,6 @@ from iri_tweet import models, utils from iri_tweet.processor import TwitterProcessorStatus -from lxml import html import json from pyquery import PyQuery @@ -35,8 +34,6 @@ def __iter__(self): - results = [] - while True: json = self.get_json_response() if len(json['items_html'].strip()) == 0: @@ -51,13 +48,13 @@ for tweetHTML in tweets: tweet_pq = PyQuery(tweetHTML) - username = tweet_pq("span.username.js-action-profile-name b").text(); - txt = re.sub(r"\s+", " ", re.sub(r"[^\x00-\x7F]", "", tweet_pq("p.js-tweet-text").text()).replace('# ', '#').replace('@ ', '@')); - retweets = int(tweet_pq("span.ProfileTweet-action--retweet span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", "")); - favorites = int(tweet_pq("span.ProfileTweet-action--favorite span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", "")); - date_sec = int(tweet_pq("small.time span.js-short-timestamp").attr("data-time")); - id = tweet_pq.attr("data-tweet-id"); - permalink = tweet_pq.attr("data-permalink-path"); + username = tweet_pq("span.username.js-action-profile-name b").text() + txt = re.sub(r"\s+", " ", re.sub(r"[^\x00-\x7F]", "", tweet_pq("p.js-tweet-text").text()).replace('# ', '#').replace('@ ', '@')) + retweets = int(tweet_pq("span.ProfileTweet-action--retweet span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", "")) + favorites = int(tweet_pq("span.ProfileTweet-action--favorite span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", "")) + date_sec = int(tweet_pq("small.time span.js-short-timestamp").attr("data-time")) + id = tweet_pq.attr("data-tweet-id") + permalink = tweet_pq.attr("data-permalink-path") geo = '' geo_span = tweet_pq('span.Tweet-geo') @@ -129,7 +126,7 @@ options = get_options() - utils.set_logging(options); + utils.set_logging(options) acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME) @@ -138,7 +135,7 @@ t.secure = True conn_str = options.conn_str.strip() - if not re.match("^\w+://.+", conn_str): + if not re.match(r"^\w+://.+", conn_str): conn_str = 'sqlite:///' + conn_str engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True) @@ -151,7 +148,7 @@ session = Session() results = None - print options.query + print(options.query) tm = TweetManager(options.query) @@ -188,9 +185,9 @@ session.flush() session.commit() - print("rate limit remaining %s of %s" % (str(tweet.rate_limit_remaining), str(tweet.headers.getheader('X-Rate-Limit-Limit'))) + term.clear_eol()) + print("rate limit remaining %s of %s" % (str(tweet.rate_limit_remaining), str(tweet.headers['X-Rate-Limit-Limit'])) + term.clear_eol()) move_up += 1 - rate_limit_limit = int(tweet.headers.getheader('X-Rate-Limit-Limit')) + rate_limit_limit = int(tweet.headers['X-Rate-Limit-Limit']) rate_limit_remaining = int(tweet.rate_limit_remaining) if rate_limit_remaining > rate_limit_limit: @@ -198,7 +195,7 @@ else: time_to_sleep = int(math.ceil((tweet.rate_limit_reset - time.mktime(time.gmtime())) / tweet.rate_limit_remaining)) - for i in xrange(time_to_sleep): + for i in range(time_to_sleep): if i: print(2*term.move_up()) else: @@ -208,7 +205,7 @@ except twitter.api.TwitterHTTPError as e: fmt = ("." + e.format) if e.format else "" - print "Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data)) + print("Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data))) finally: if session: