script/utils/search_twitter_api.py
changeset 1497 14a9bed2e3cd
parent 1496 184372ec27e2
child 1523 53f1b28188f0
--- a/script/utils/search_twitter_api.py	Wed Jan 02 17:49:19 2019 +0100
+++ b/script/utils/search_twitter_api.py	Thu Jan 10 18:36:36 2019 +0100
@@ -1,47 +1,91 @@
 import argparse
+import datetime
+import functools
+import json
 import logging
 import math
 import re
 import time
-import datetime
 import urllib
+from enum import Enum
 
-from blessings import Terminal
 import requests
 import twitter
+from blessings import Terminal
 
 from iri_tweet import models, utils
 from iri_tweet.processor import TwitterProcessorStatus
 
-import json
-
 logger = logging.getLogger(__name__)
 
 APPLICATION_NAME = "Tweet seach json"
 
 
+class SearchType(Enum):
+    standard = 'standard'
+    _30day = '30day'
+    full = 'full'
+
+    def __str__(self):
+        return self.value
+
+def pass_kwargs_as_json(f):
+    def kwargs_json_wrapper(*args, **kwargs):
+        normal_kwargs = { k:v for k,v in kwargs.items() if k[0] != "_" }
+        special_kwargs = { k:v for k,v in kwargs.items() if k[0] == "_" }
+        new_kwargs = { **special_kwargs, '_json': normal_kwargs }
+        return f(*args, **new_kwargs)
+    return kwargs_json_wrapper
+
 # TODO: implement some more parameters
 # script to "scrap twitter results"
 # Shamelessly taken from https://github.com/Jefferson-Henrique/GetOldTweets-python
 # pyquery cssselect
 class TweetManager:
 
-    def __init__(self, query, twitter_con):
+    def __init__(self, twitter_con, query, search_type, api_env):
         self.query = query
-        self.max_id = 0
+        self.search_type = search_type
+        self.next = ""
         self.t = twitter_con
-        pass
+        self.api_env = api_env
+        self.twitter_api = self.get_twitter_api()
+        self.rate_limit_remaining = 0
+        self.rate_limit_limit = 0
+        self.rate_limit_reset = 0
+        self.i = 0
+
+    def get_twitter_api(self):
+        return {
+            SearchType.standard: lambda t: t.search.tweets,
+            SearchType._30day:   lambda t: pass_kwargs_as_json(functools.partial(getattr(getattr(t.tweets.search,'30day'),self.api_env), _method="POST")),
+            SearchType.full:     lambda t: pass_kwargs_as_json(functools.partial(getattr(t.tweets.search.fullarchive, self.api_env), _method="POST")),
+        }[self.search_type](self.t)
 
     def __iter__(self):
         while True:
-            if self.max_id < 0:
+            if self.next is None:
                 break
-            json = self.get_json_response()
+            self.i = self.i+1
+
+            # with open("json_dump_%s.json" % self.i, 'r') as fp:
+            #     jsondata = json.load(fp)
+            jsondata = self.get_json_response()
 
-            next_results = json['search_metadata'].get('next_results', "?")[1:]
-            self.max_id = int(urllib.parse.parse_qs(next_results).get('max_id', [-1])[0])
+            self.rate_limit_remaining = jsondata.rate_limit_remaining
+            self.rate_limit_limit = jsondata.rate_limit_limit
+            self.rate_limit_reset = jsondata.rate_limit_reset
+
+            with open("json_dump_%s.json" % self.i, 'w') as fp:
+                json.dump(jsondata, fp)
 
-            tweet_list = json['statuses']
+            if self.search_type == SearchType.standard:
+                next_results = jsondata['search_metadata'].get('next_results', "?")[1:]
+                self.next = urllib.parse.parse_qs(next_results).get('max_id', [None])[0]
+                tweet_list = jsondata['statuses']
+            else:
+                self.next = jsondata.get('next')
+                tweet_list = jsondata['results']
 
             if len(tweet_list) == 0:
                 break
@@ -50,8 +94,13 @@
                 yield tweet
 
     def get_json_response(self):
-        return self.t.search.tweets(q=self.query, include_entities=True, max_id=self.max_id)
-
+        if self.search_type == SearchType.standard:
+            return self.twitter_api(q=self.query, include_entities=True, max_id=int(self.next) if self.next else 0)
+        else:
+            kwargs = { "query": self.query, "maxResults": 100 }
+            if self.next:
+                kwargs["next"] = self.next
+            return self.twitter_api(**kwargs)
 
 def get_options():
 
@@ -62,31 +111,37 @@
     parser.add_argument(dest="conn_str",
                         help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR")
     parser.add_argument("-Q", dest="query",
-                      help="query", metavar="QUERY")
+                        help="query", metavar="QUERY")
     parser.add_argument("-k", "--key", dest="consumer_key",
                         help="Twitter consumer key", metavar="CONSUMER_KEY")
     parser.add_argument("-s", "--secret", dest="consumer_secret",
                         help="Twitter consumer secret", metavar="CONSUMER_SECRET")
     parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
-                      help="Token file name")
+                        help="Token file name")
+    parser.add_argument("-a", dest="search_type", metavar="SEARCH_TYPE", default=SearchType.standard, choices=list(SearchType), type=SearchType,
+                        help="Twitter search type ('standard', '30days', 'full')")
+    parser.add_argument("-e", dest="api_env", metavar="API_ENV", default="dev",
+                        help="Twitter api dev environment")
+
 
     utils.set_logging_options(parser)
 
     return parser.parse_args()
 
 
-
 if __name__ == "__main__":
 
     options = get_options()
 
+    print("the search type is : %s" % options.search_type)
+
     utils.set_logging(options)
 
-
-    acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME)
+    bearer_token = utils.get_oauth2_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME)
+    twitter_auth = twitter.OAuth2(options.consumer_key, options.consumer_secret, bearer_token)
 
-    t = twitter.Twitter(domain="api.twitter.com", auth=twitter.OAuth(acess_token_key, access_token_secret, options.consumer_key, options.consumer_secret), secure=True)
-    t.secure = True    
+    t = twitter.Twitter(domain="api.twitter.com", auth=twitter_auth, secure=True)
+    t.secure = True
 
     conn_str = options.conn_str.strip()
     if not re.match(r"^\w+://.+", conn_str):
@@ -104,7 +159,7 @@
         results = None
         print(options.query)
 
-        tm = TweetManager(options.query, t)
+        tm = TweetManager(t, options.query, options.search_type, options.api_env)
 
         move_up = 0
 
@@ -127,7 +182,7 @@
             if count_tweet:
                 continue
 
-            processor = TwitterProcessorStatus(tweet, None, None, session, None, options.token_filename, logger)
+            processor = TwitterProcessorStatus(tweet, None, None, session, twitter_auth=twitter_auth, logger=logger)
             processor.process()
             session.flush()
             session.commit()