script/utils/search_twitter_json.py
changeset 1496 184372ec27e2
parent 1334 e1d3c1469691
child 1497 14a9bed2e3cd
--- a/script/utils/search_twitter_json.py	Fri Dec 21 12:33:01 2018 +0100
+++ b/script/utils/search_twitter_json.py	Wed Jan 02 17:49:19 2019 +0100
@@ -13,7 +13,6 @@
 from iri_tweet import models, utils
 from iri_tweet.processor import TwitterProcessorStatus
 
-from lxml import html
 import json
 from pyquery import PyQuery
 
@@ -35,8 +34,6 @@
 
     def __iter__(self):
 
-        results = []
-
         while True:
             json = self.get_json_response()
             if len(json['items_html'].strip()) == 0:
@@ -51,13 +48,13 @@
             for tweetHTML in tweets:
                 tweet_pq = PyQuery(tweetHTML)
 
-                username = tweet_pq("span.username.js-action-profile-name b").text();
-                txt = re.sub(r"\s+", " ", re.sub(r"[^\x00-\x7F]", "", tweet_pq("p.js-tweet-text").text()).replace('# ', '#').replace('@ ', '@'));
-                retweets = int(tweet_pq("span.ProfileTweet-action--retweet span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""));
-                favorites = int(tweet_pq("span.ProfileTweet-action--favorite span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""));
-                date_sec = int(tweet_pq("small.time span.js-short-timestamp").attr("data-time"));
-                id = tweet_pq.attr("data-tweet-id");
-                permalink = tweet_pq.attr("data-permalink-path");
+                username = tweet_pq("span.username.js-action-profile-name b").text()
+                txt = re.sub(r"\s+", " ", re.sub(r"[^\x00-\x7F]", "", tweet_pq("p.js-tweet-text").text()).replace('# ', '#').replace('@ ', '@'))
+                retweets = int(tweet_pq("span.ProfileTweet-action--retweet span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""))
+                favorites = int(tweet_pq("span.ProfileTweet-action--favorite span.ProfileTweet-actionCount").attr("data-tweet-stat-count").replace(",", ""))
+                date_sec = int(tweet_pq("small.time span.js-short-timestamp").attr("data-time"))
+                id = tweet_pq.attr("data-tweet-id")
+                permalink = tweet_pq.attr("data-permalink-path")
 
                 geo = ''
                 geo_span = tweet_pq('span.Tweet-geo')
@@ -129,7 +126,7 @@
 
     options = get_options()
 
-    utils.set_logging(options);
+    utils.set_logging(options)
 
 
     acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME)
@@ -138,7 +135,7 @@
     t.secure = True
 
     conn_str = options.conn_str.strip()
-    if not re.match("^\w+://.+", conn_str):
+    if not re.match(r"^\w+://.+", conn_str):
         conn_str = 'sqlite:///' + conn_str
 
     engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True)
@@ -151,7 +148,7 @@
         session = Session()
 
         results = None
-        print options.query
+        print(options.query)
 
         tm = TweetManager(options.query)
 
@@ -188,9 +185,9 @@
             session.flush()
             session.commit()
 
-            print("rate limit remaining %s of %s" % (str(tweet.rate_limit_remaining), str(tweet.headers.getheader('X-Rate-Limit-Limit'))) + term.clear_eol())
+            print("rate limit remaining %s of %s" % (str(tweet.rate_limit_remaining), str(tweet.headers['X-Rate-Limit-Limit'])) + term.clear_eol())
             move_up += 1
-            rate_limit_limit = int(tweet.headers.getheader('X-Rate-Limit-Limit'))
+            rate_limit_limit = int(tweet.headers['X-Rate-Limit-Limit'])
             rate_limit_remaining = int(tweet.rate_limit_remaining)
 
             if rate_limit_remaining > rate_limit_limit:
@@ -198,7 +195,7 @@
             else:
                 time_to_sleep = int(math.ceil((tweet.rate_limit_reset - time.mktime(time.gmtime())) / tweet.rate_limit_remaining))
 
-            for i in xrange(time_to_sleep):
+            for i in range(time_to_sleep):
                 if i:
                     print(2*term.move_up())
                 else:
@@ -208,7 +205,7 @@
 
     except twitter.api.TwitterHTTPError as e:
         fmt = ("." + e.format) if e.format else ""
-        print "Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data))
+        print("Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data)))
 
     finally:
         if session: