script/utils/search_twitter_api.py
author ymh <ymh.work@gmail.com>
Wed, 02 Jan 2019 17:49:19 +0100
changeset 1496 184372ec27e2
child 1497 14a9bed2e3cd
permissions -rw-r--r--
upgrade to python 3 and twitter api
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1496
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
import argparse
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
import logging
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
import math
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
import re
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
import time
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
import datetime
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
import urllib
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
from blessings import Terminal
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
import requests
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
import twitter
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
from iri_tweet import models, utils
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
from iri_tweet.processor import TwitterProcessorStatus
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
import json
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
logger = logging.getLogger(__name__)
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
APPLICATION_NAME = "Tweet seach json"
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
# TODO: implement some more parameters
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
# script to "scrap twitter results"
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
# Shamelessly taken from https://github.com/Jefferson-Henrique/GetOldTweets-python
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
# pyquery cssselect
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
class TweetManager:
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
    def __init__(self, query, twitter_con):
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
        self.query = query
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
        self.max_id = 0
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
        self.t = twitter_con
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
        pass
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
    def __iter__(self):
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
        while True:
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
            if self.max_id < 0:
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
                break
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
            json = self.get_json_response()
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
            next_results = json['search_metadata'].get('next_results', "?")[1:]
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
            self.max_id = int(urllib.parse.parse_qs(next_results).get('max_id', [-1])[0])
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
            tweet_list = json['statuses']
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
            if len(tweet_list) == 0:
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
                break
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
            for tweet in tweet_list:
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
                yield tweet
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
    def get_json_response(self):
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
        return self.t.search.tweets(q=self.query, include_entities=True, max_id=self.max_id)
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
def get_options():
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
    usage = "usage: %(prog)s [options] <connection_str_or_filepath>"
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
    parser = argparse.ArgumentParser(usage=usage)
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
    parser.add_argument(dest="conn_str",
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
                        help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR")
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
    parser.add_argument("-Q", dest="query",
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
                      help="query", metavar="QUERY")
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
    parser.add_argument("-k", "--key", dest="consumer_key",
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
                        help="Twitter consumer key", metavar="CONSUMER_KEY")
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
    parser.add_argument("-s", "--secret", dest="consumer_secret",
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
                        help="Twitter consumer secret", metavar="CONSUMER_SECRET")
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
    parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
                      help="Token file name")
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    72
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
    utils.set_logging_options(parser)
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
    return parser.parse_args()
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
if __name__ == "__main__":
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    80
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
    options = get_options()
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
    utils.set_logging(options)
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
    acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME)
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
    t = twitter.Twitter(domain="api.twitter.com", auth=twitter.OAuth(acess_token_key, access_token_secret, options.consumer_key, options.consumer_secret), secure=True)
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
    t.secure = True    
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
    conn_str = options.conn_str.strip()
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
    if not re.match(r"^\w+://.+", conn_str):
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    93
        conn_str = 'sqlite:///' + conn_str
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
    engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True)
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    96
    session = None
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    97
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    98
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
    99
    term = Terminal()
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   101
    try:
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   102
        session = Session()
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   103
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   104
        results = None
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   105
        print(options.query)
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   106
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   107
        tm = TweetManager(options.query, t)
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   108
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   109
        move_up = 0
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   110
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   111
        for i,tweet in enumerate(tm):
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   112
            # get id
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   113
            tweet_id = tweet.get("id")
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   114
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   115
            if not tweet_id:
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   116
                continue
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   117
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   118
            if move_up > 0:
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   119
                print((move_up+1)*term.move_up())
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   120
                move_up = 0
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   121
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   122
            print ("%d: %s - %r" % (i+1, tweet_id, tweet.get("text", "") ) + term.clear_eol())
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   123
            move_up += 1
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   124
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   125
            count_tweet = session.query(models.Tweet).filter_by(id_str=tweet_id).count()
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   126
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   127
            if count_tweet:
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   128
                continue
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   129
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   130
            processor = TwitterProcessorStatus(tweet, None, None, session, None, options.token_filename, logger)
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   131
            processor.process()
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   132
            session.flush()
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   133
            session.commit()
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   134
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   135
    except twitter.api.TwitterHTTPError as e:
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   136
        fmt = ("." + e.format) if e.format else ""
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   137
        print("Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data)))
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   138
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   139
    finally:
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   140
        if session:
184372ec27e2 upgrade to python 3 and twitter api
ymh <ymh.work@gmail.com>
parents:
diff changeset
   141
            session.close()