--- a/script/utils/search_twitter_api.py Wed Jan 02 17:49:19 2019 +0100
+++ b/script/utils/search_twitter_api.py Thu Jan 10 18:36:36 2019 +0100
@@ -1,47 +1,91 @@
import argparse
+import datetime
+import functools
+import json
import logging
import math
import re
import time
-import datetime
import urllib
+from enum import Enum
-from blessings import Terminal
import requests
import twitter
+from blessings import Terminal
from iri_tweet import models, utils
from iri_tweet.processor import TwitterProcessorStatus
-import json
-
logger = logging.getLogger(__name__)
APPLICATION_NAME = "Tweet seach json"
+class SearchType(Enum):
+ standard = 'standard'
+ _30day = '30day'
+ full = 'full'
+
+ def __str__(self):
+ return self.value
+
+def pass_kwargs_as_json(f):
+ def kwargs_json_wrapper(*args, **kwargs):
+ normal_kwargs = { k:v for k,v in kwargs.items() if k[0] != "_" }
+ special_kwargs = { k:v for k,v in kwargs.items() if k[0] == "_" }
+ new_kwargs = { **special_kwargs, '_json': normal_kwargs }
+ return f(*args, **new_kwargs)
+ return kwargs_json_wrapper
+
# TODO: implement some more parameters
# script to "scrap twitter results"
# Shamelessly taken from https://github.com/Jefferson-Henrique/GetOldTweets-python
# pyquery cssselect
class TweetManager:
- def __init__(self, query, twitter_con):
+ def __init__(self, twitter_con, query, search_type, api_env):
self.query = query
- self.max_id = 0
+ self.search_type = search_type
+ self.next = ""
self.t = twitter_con
- pass
+ self.api_env = api_env
+ self.twitter_api = self.get_twitter_api()
+ self.rate_limit_remaining = 0
+ self.rate_limit_limit = 0
+ self.rate_limit_reset = 0
+ self.i = 0
+
+ def get_twitter_api(self):
+ return {
+ SearchType.standard: lambda t: t.search.tweets,
+ SearchType._30day: lambda t: pass_kwargs_as_json(functools.partial(getattr(getattr(t.tweets.search,'30day'),self.api_env), _method="POST")),
+ SearchType.full: lambda t: pass_kwargs_as_json(functools.partial(getattr(t.tweets.search.fullarchive, self.api_env), _method="POST")),
+ }[self.search_type](self.t)
def __iter__(self):
while True:
- if self.max_id < 0:
+ if self.next is None:
break
- json = self.get_json_response()
+ self.i = self.i+1
+
+ # with open("json_dump_%s.json" % self.i, 'r') as fp:
+ # jsondata = json.load(fp)
+ jsondata = self.get_json_response()
- next_results = json['search_metadata'].get('next_results', "?")[1:]
- self.max_id = int(urllib.parse.parse_qs(next_results).get('max_id', [-1])[0])
+ self.rate_limit_remaining = jsondata.rate_limit_remaining
+ self.rate_limit_limit = jsondata.rate_limit_limit
+ self.rate_limit_reset = jsondata.rate_limit_reset
+
+ with open("json_dump_%s.json" % self.i, 'w') as fp:
+ json.dump(jsondata, fp)
- tweet_list = json['statuses']
+ if self.search_type == SearchType.standard:
+ next_results = jsondata['search_metadata'].get('next_results', "?")[1:]
+ self.next = urllib.parse.parse_qs(next_results).get('max_id', [None])[0]
+ tweet_list = jsondata['statuses']
+ else:
+ self.next = jsondata.get('next')
+ tweet_list = jsondata['results']
if len(tweet_list) == 0:
break
@@ -50,8 +94,13 @@
yield tweet
def get_json_response(self):
- return self.t.search.tweets(q=self.query, include_entities=True, max_id=self.max_id)
-
+ if self.search_type == SearchType.standard:
+ return self.twitter_api(q=self.query, include_entities=True, max_id=int(self.next) if self.next else 0)
+ else:
+ kwargs = { "query": self.query, "maxResults": 100 }
+ if self.next:
+ kwargs["next"] = self.next
+ return self.twitter_api(**kwargs)
def get_options():
@@ -62,31 +111,37 @@
parser.add_argument(dest="conn_str",
help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR")
parser.add_argument("-Q", dest="query",
- help="query", metavar="QUERY")
+ help="query", metavar="QUERY")
parser.add_argument("-k", "--key", dest="consumer_key",
help="Twitter consumer key", metavar="CONSUMER_KEY")
parser.add_argument("-s", "--secret", dest="consumer_secret",
help="Twitter consumer secret", metavar="CONSUMER_SECRET")
parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
- help="Token file name")
+ help="Token file name")
+ parser.add_argument("-a", dest="search_type", metavar="SEARCH_TYPE", default=SearchType.standard, choices=list(SearchType), type=SearchType,
+ help="Twitter search type ('standard', '30days', 'full')")
+ parser.add_argument("-e", dest="api_env", metavar="API_ENV", default="dev",
+ help="Twitter api dev environment")
+
utils.set_logging_options(parser)
return parser.parse_args()
-
if __name__ == "__main__":
options = get_options()
+ print("the search type is : %s" % options.search_type)
+
utils.set_logging(options)
-
- acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME)
+ bearer_token = utils.get_oauth2_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME)
+ twitter_auth = twitter.OAuth2(options.consumer_key, options.consumer_secret, bearer_token)
- t = twitter.Twitter(domain="api.twitter.com", auth=twitter.OAuth(acess_token_key, access_token_secret, options.consumer_key, options.consumer_secret), secure=True)
- t.secure = True
+ t = twitter.Twitter(domain="api.twitter.com", auth=twitter_auth, secure=True)
+ t.secure = True
conn_str = options.conn_str.strip()
if not re.match(r"^\w+://.+", conn_str):
@@ -104,7 +159,7 @@
results = None
print(options.query)
- tm = TweetManager(options.query, t)
+ tm = TweetManager(t, options.query, options.search_type, options.api_env)
move_up = 0
@@ -127,7 +182,7 @@
if count_tweet:
continue
- processor = TwitterProcessorStatus(tweet, None, None, session, None, options.token_filename, logger)
+ processor = TwitterProcessorStatus(tweet, None, None, session, twitter_auth=twitter_auth, logger=logger)
processor.process()
session.flush()
session.commit()