diff -r efbda157eb57 -r 184372ec27e2 script/utils/search_twitter_api.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/script/utils/search_twitter_api.py Wed Jan 02 17:49:19 2019 +0100 @@ -0,0 +1,141 @@ +import argparse +import logging +import math +import re +import time +import datetime +import urllib + +from blessings import Terminal +import requests +import twitter + +from iri_tweet import models, utils +from iri_tweet.processor import TwitterProcessorStatus + +import json + +logger = logging.getLogger(__name__) + +APPLICATION_NAME = "Tweet seach json" + + +# TODO: implement some more parameters +# script to "scrap twitter results" +# Shamelessly taken from https://github.com/Jefferson-Henrique/GetOldTweets-python +# pyquery cssselect +class TweetManager: + + def __init__(self, query, twitter_con): + self.query = query + self.max_id = 0 + self.t = twitter_con + pass + + def __iter__(self): + while True: + if self.max_id < 0: + break + json = self.get_json_response() + + next_results = json['search_metadata'].get('next_results', "?")[1:] + self.max_id = int(urllib.parse.parse_qs(next_results).get('max_id', [-1])[0]) + + tweet_list = json['statuses'] + + if len(tweet_list) == 0: + break + + for tweet in tweet_list: + yield tweet + + def get_json_response(self): + return self.t.search.tweets(q=self.query, include_entities=True, max_id=self.max_id) + + +def get_options(): + + usage = "usage: %(prog)s [options] " + + parser = argparse.ArgumentParser(usage=usage) + + parser.add_argument(dest="conn_str", + help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR") + parser.add_argument("-Q", dest="query", + help="query", metavar="QUERY") + parser.add_argument("-k", "--key", dest="consumer_key", + help="Twitter consumer key", metavar="CONSUMER_KEY") + parser.add_argument("-s", "--secret", dest="consumer_secret", + help="Twitter consumer secret", metavar="CONSUMER_SECRET") + parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token", + help="Token file name") + + utils.set_logging_options(parser) + + return parser.parse_args() + + + +if __name__ == "__main__": + + options = get_options() + + utils.set_logging(options) + + + acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME) + + t = twitter.Twitter(domain="api.twitter.com", auth=twitter.OAuth(acess_token_key, access_token_secret, options.consumer_key, options.consumer_secret), secure=True) + t.secure = True + + conn_str = options.conn_str.strip() + if not re.match(r"^\w+://.+", conn_str): + conn_str = 'sqlite:///' + conn_str + + engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True) + session = None + + + term = Terminal() + + try: + session = Session() + + results = None + print(options.query) + + tm = TweetManager(options.query, t) + + move_up = 0 + + for i,tweet in enumerate(tm): + # get id + tweet_id = tweet.get("id") + + if not tweet_id: + continue + + if move_up > 0: + print((move_up+1)*term.move_up()) + move_up = 0 + + print ("%d: %s - %r" % (i+1, tweet_id, tweet.get("text", "") ) + term.clear_eol()) + move_up += 1 + + count_tweet = session.query(models.Tweet).filter_by(id_str=tweet_id).count() + + if count_tweet: + continue + + processor = TwitterProcessorStatus(tweet, None, None, session, None, options.token_filename, logger) + processor.process() + session.flush() + session.commit() + + except twitter.api.TwitterHTTPError as e: + fmt = ("." + e.format) if e.format else "" + print("Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data))) + + finally: + if session: + session.close()