|
1 import argparse |
|
2 import logging |
|
3 import math |
|
4 import re |
|
5 import time |
|
6 import datetime |
|
7 import urllib |
|
8 |
|
9 from blessings import Terminal |
|
10 import requests |
|
11 import twitter |
|
12 |
|
13 from iri_tweet import models, utils |
|
14 from iri_tweet.processor import TwitterProcessorStatus |
|
15 |
|
16 import json |
|
17 |
|
18 logger = logging.getLogger(__name__) |
|
19 |
|
20 APPLICATION_NAME = "Tweet seach json" |
|
21 |
|
22 |
|
23 # TODO: implement some more parameters |
|
24 # script to "scrap twitter results" |
|
25 # Shamelessly taken from https://github.com/Jefferson-Henrique/GetOldTweets-python |
|
26 # pyquery cssselect |
|
27 class TweetManager: |
|
28 |
|
29 def __init__(self, query, twitter_con): |
|
30 self.query = query |
|
31 self.max_id = 0 |
|
32 self.t = twitter_con |
|
33 pass |
|
34 |
|
35 def __iter__(self): |
|
36 while True: |
|
37 if self.max_id < 0: |
|
38 break |
|
39 json = self.get_json_response() |
|
40 |
|
41 next_results = json['search_metadata'].get('next_results', "?")[1:] |
|
42 self.max_id = int(urllib.parse.parse_qs(next_results).get('max_id', [-1])[0]) |
|
43 |
|
44 tweet_list = json['statuses'] |
|
45 |
|
46 if len(tweet_list) == 0: |
|
47 break |
|
48 |
|
49 for tweet in tweet_list: |
|
50 yield tweet |
|
51 |
|
52 def get_json_response(self): |
|
53 return self.t.search.tweets(q=self.query, include_entities=True, max_id=self.max_id) |
|
54 |
|
55 |
|
56 def get_options(): |
|
57 |
|
58 usage = "usage: %(prog)s [options] <connection_str_or_filepath>" |
|
59 |
|
60 parser = argparse.ArgumentParser(usage=usage) |
|
61 |
|
62 parser.add_argument(dest="conn_str", |
|
63 help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR") |
|
64 parser.add_argument("-Q", dest="query", |
|
65 help="query", metavar="QUERY") |
|
66 parser.add_argument("-k", "--key", dest="consumer_key", |
|
67 help="Twitter consumer key", metavar="CONSUMER_KEY") |
|
68 parser.add_argument("-s", "--secret", dest="consumer_secret", |
|
69 help="Twitter consumer secret", metavar="CONSUMER_SECRET") |
|
70 parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token", |
|
71 help="Token file name") |
|
72 |
|
73 utils.set_logging_options(parser) |
|
74 |
|
75 return parser.parse_args() |
|
76 |
|
77 |
|
78 |
|
79 if __name__ == "__main__": |
|
80 |
|
81 options = get_options() |
|
82 |
|
83 utils.set_logging(options) |
|
84 |
|
85 |
|
86 acess_token_key, access_token_secret = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename, application_name=APPLICATION_NAME) |
|
87 |
|
88 t = twitter.Twitter(domain="api.twitter.com", auth=twitter.OAuth(acess_token_key, access_token_secret, options.consumer_key, options.consumer_secret), secure=True) |
|
89 t.secure = True |
|
90 |
|
91 conn_str = options.conn_str.strip() |
|
92 if not re.match(r"^\w+://.+", conn_str): |
|
93 conn_str = 'sqlite:///' + conn_str |
|
94 |
|
95 engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True) |
|
96 session = None |
|
97 |
|
98 |
|
99 term = Terminal() |
|
100 |
|
101 try: |
|
102 session = Session() |
|
103 |
|
104 results = None |
|
105 print(options.query) |
|
106 |
|
107 tm = TweetManager(options.query, t) |
|
108 |
|
109 move_up = 0 |
|
110 |
|
111 for i,tweet in enumerate(tm): |
|
112 # get id |
|
113 tweet_id = tweet.get("id") |
|
114 |
|
115 if not tweet_id: |
|
116 continue |
|
117 |
|
118 if move_up > 0: |
|
119 print((move_up+1)*term.move_up()) |
|
120 move_up = 0 |
|
121 |
|
122 print ("%d: %s - %r" % (i+1, tweet_id, tweet.get("text", "") ) + term.clear_eol()) |
|
123 move_up += 1 |
|
124 |
|
125 count_tweet = session.query(models.Tweet).filter_by(id_str=tweet_id).count() |
|
126 |
|
127 if count_tweet: |
|
128 continue |
|
129 |
|
130 processor = TwitterProcessorStatus(tweet, None, None, session, None, options.token_filename, logger) |
|
131 processor.process() |
|
132 session.flush() |
|
133 session.commit() |
|
134 |
|
135 except twitter.api.TwitterHTTPError as e: |
|
136 fmt = ("." + e.format) if e.format else "" |
|
137 print("Twitter sent status %s for URL: %s%s using parameters: (%s)\ndetails: %s" % (repr(e.e.code), repr(e.uri), repr(fmt), repr(e.uriparts), repr(e.response_data))) |
|
138 |
|
139 finally: |
|
140 if session: |
|
141 session.close() |