script/utils/search_twitter_api.py
author ymh <ymh.work@gmail.com>
Wed, 02 Jan 2019 17:49:19 +0100
changeset 1496 184372ec27e2
child 1497 14a9bed2e3cd
permissions -rw-r--r--
upgrade to python 3 and twitter api

import argparse
import logging
import math
import re
import time
import datetime
import urllib

from blessings import Terminal
import requests
import twitter

from iri_tweet import models, utils
from iri_tweet.processor import TwitterProcessorStatus

import json

logger = logging.getLogger(__name__)

APPLICATION_NAME = "Tweet seach json"


# TODO: implement some more parameters
# script to "scrap twitter results"
# Shamelessly taken from https://github.com/Jefferson-Henrique/GetOldTweets-python
# pyquery cssselect
class TweetManager:

    def __init__(self, query, twitter_con):
        self.query = query
        self.max_id = 0
        self.t = twitter_con
        pass

    def __iter__(self):
        while True:
            if self.max_id < 0:
                break
            json = self.get_json_response()

            next_results = json['search_metadata'].get('next_results', "?")[1:]
            self.max_id = int(urllib.parse.parse_qs(next_results).get('max_id', [-1])[0])

            tweet_list = json['statuses']

            if len(tweet_list) == 0:
                break

            for tweet in tweet_list:
                yield tweet

    def get_json_response(self):
        return self.t.search.tweets(q=self.query, include_entities=True, max_id=self.max_id)


def get_options():

    usage = "usage: %(prog)s [options] <connection_str_or_filepath>"

    parser = argparse.ArgumentParser(usage=usage)

    parser.add_argument(dest="conn_str",
                        help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR")
    parser.add_argument("-Q", dest="query",
                      help="query", metavar="QUERY")
    parser.add_argument("-k", "--key", dest="consumer_key",
                        help="Twitter consumer key", metavar="CONSUMER_KEY")
    parser.add_argument("-s", "--secret", dest="consumer_secret",
                        help="Twitter consumer secret", metavar="CONSUMER_SECRET")
    parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
                      help="Token file name")

    utils.set_logging_options(parser)

    return parser.parse_args()



if __name__ == "__main__":

    options = get_options()

    utils.set_logging(options)


    acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME)

    t = twitter.Twitter(domain="api.twitter.com", auth=twitter.OAuth(acess_token_key, access_token_secret, options.consumer_key, options.consumer_secret), secure=True)
    t.secure = True    

    conn_str = options.conn_str.strip()
    if not re.match(r"^\w+://.+", conn_str):
        conn_str = 'sqlite:///' + conn_str

    engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True)
    session = None


    term = Terminal()

    try:
        session = Session()

        results = None
        print(options.query)

        tm = TweetManager(options.query, t)

        move_up = 0

        for i,tweet in enumerate(tm):
            # get id
            tweet_id = tweet.get("id")

            if not tweet_id:
                continue

            if move_up > 0:
                print((move_up+1)*term.move_up())
                move_up = 0

            print ("%d: %s - %r" % (i+1, tweet_id, tweet.get("text", "") ) + term.clear_eol())
            move_up += 1

            count_tweet = session.query(models.Tweet).filter_by(id_str=tweet_id).count()

            if count_tweet:
                continue

            processor = TwitterProcessorStatus(tweet, None, None, session, None, options.token_filename, logger)
            processor.process()
            session.flush()
            session.commit()

    except twitter.api.TwitterHTTPError as e:
        fmt = ("." + e.format) if e.format else ""
        print("Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data)))

    finally:
        if session:
            session.close()