script/utils/search_twitter_api.py
changeset 1496 184372ec27e2
child 1497 14a9bed2e3cd
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/script/utils/search_twitter_api.py	Wed Jan 02 17:49:19 2019 +0100
@@ -0,0 +1,141 @@
+import argparse
+import logging
+import math
+import re
+import time
+import datetime
+import urllib
+
+from blessings import Terminal
+import requests
+import twitter
+
+from iri_tweet import models, utils
+from iri_tweet.processor import TwitterProcessorStatus
+
+import json
+
+logger = logging.getLogger(__name__)
+
+APPLICATION_NAME = "Tweet seach json"
+
+
+# TODO: implement some more parameters
+# script to "scrap twitter results"
+# Shamelessly taken from https://github.com/Jefferson-Henrique/GetOldTweets-python
+# pyquery cssselect
+class TweetManager:
+
+    def __init__(self, query, twitter_con):
+        self.query = query
+        self.max_id = 0
+        self.t = twitter_con
+        pass
+
+    def __iter__(self):
+        while True:
+            if self.max_id < 0:
+                break
+            json = self.get_json_response()
+
+            next_results = json['search_metadata'].get('next_results', "?")[1:]
+            self.max_id = int(urllib.parse.parse_qs(next_results).get('max_id', [-1])[0])
+
+            tweet_list = json['statuses']
+
+            if len(tweet_list) == 0:
+                break
+
+            for tweet in tweet_list:
+                yield tweet
+
+    def get_json_response(self):
+        return self.t.search.tweets(q=self.query, include_entities=True, max_id=self.max_id)
+
+
+def get_options():
+
+    usage = "usage: %(prog)s [options] <connection_str_or_filepath>"
+
+    parser = argparse.ArgumentParser(usage=usage)
+
+    parser.add_argument(dest="conn_str",
+                        help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR")
+    parser.add_argument("-Q", dest="query",
+                      help="query", metavar="QUERY")
+    parser.add_argument("-k", "--key", dest="consumer_key",
+                        help="Twitter consumer key", metavar="CONSUMER_KEY")
+    parser.add_argument("-s", "--secret", dest="consumer_secret",
+                        help="Twitter consumer secret", metavar="CONSUMER_SECRET")
+    parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
+                      help="Token file name")
+
+    utils.set_logging_options(parser)
+
+    return parser.parse_args()
+
+
+
+if __name__ == "__main__":
+
+    options = get_options()
+
+    utils.set_logging(options)
+
+
+    acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME)
+
+    t = twitter.Twitter(domain="api.twitter.com", auth=twitter.OAuth(acess_token_key, access_token_secret, options.consumer_key, options.consumer_secret), secure=True)
+    t.secure = True    
+
+    conn_str = options.conn_str.strip()
+    if not re.match(r"^\w+://.+", conn_str):
+        conn_str = 'sqlite:///' + conn_str
+
+    engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True)
+    session = None
+
+
+    term = Terminal()
+
+    try:
+        session = Session()
+
+        results = None
+        print(options.query)
+
+        tm = TweetManager(options.query, t)
+
+        move_up = 0
+
+        for i,tweet in enumerate(tm):
+            # get id
+            tweet_id = tweet.get("id")
+
+            if not tweet_id:
+                continue
+
+            if move_up > 0:
+                print((move_up+1)*term.move_up())
+                move_up = 0
+
+            print ("%d: %s - %r" % (i+1, tweet_id, tweet.get("text", "") ) + term.clear_eol())
+            move_up += 1
+
+            count_tweet = session.query(models.Tweet).filter_by(id_str=tweet_id).count()
+
+            if count_tweet:
+                continue
+
+            processor = TwitterProcessorStatus(tweet, None, None, session, None, options.token_filename, logger)
+            processor.process()
+            session.flush()
+            session.commit()
+
+    except twitter.api.TwitterHTTPError as e:
+        fmt = ("." + e.format) if e.format else ""
+        print("Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data)))
+
+    finally:
+        if session:
+            session.close()