| author | ymh <ymh.work@gmail.com> |
| Sat, 22 Sep 2018 12:30:48 +0200 | |
| changeset 1452 | 414c7d88133b |
| parent 1166 | d92b7a46e5c1 |
| permissions | -rw-r--r-- |
|
1166
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
1 |
import argparse |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
2 |
import re |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
3 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
4 |
import anyjson |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
5 |
import twitter |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
6 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
7 |
from iri_tweet import models, processor, utils |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
8 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
9 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
10 |
def get_options(): |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
11 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
12 |
usage = "usage: %(prog)s [options] <connection_str_or_filepath>" |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
13 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
14 |
parser = argparse.ArgumentParser(usage=usage) |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
15 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
16 |
parser.add_argument(dest="conn_str", |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
17 |
help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR") |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
18 |
parser.add_argument("-S", dest="screen_name", |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
19 |
help="Screen name", metavar="SCREENNAME", default=None) |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
20 |
parser.add_argument("-U", dest="user_id", |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
21 |
help="User id", metavar="USERID", default=None) |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
22 |
parser.add_argument("-P", dest="rpp", metavar="RPP", default="200", |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
23 |
help="Result per page") |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
24 |
parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token", |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
25 |
help="Token file name") |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
26 |
parser.add_argument("-k", "--key", dest="consumer_key", |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
27 |
help="Twitter consumer key", metavar="CONSUMER_KEY") |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
28 |
parser.add_argument("-s", "--secret", dest="consumer_secret", |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
29 |
help="Twitter consumer secret", metavar="CONSUMER_SECRET") |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
30 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
31 |
utils.set_logging_options(parser) |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
32 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
33 |
return parser.parse_args() |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
34 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
35 |
def get_auth(options, access_token): |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
36 |
consumer_key = options.consumer_key |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
37 |
consumer_secret = options.consumer_secret |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
38 |
auth = twitter.OAuth(token=access_token[0], token_secret=access_token[1], consumer_key=consumer_key, consumer_secret=consumer_secret) |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
39 |
return auth |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
40 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
41 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
42 |
if __name__ == "__main__": |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
43 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
44 |
options = get_options() |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
45 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
46 |
access_token = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename) |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
47 |
auth = get_auth(options, access_token) |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
48 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
49 |
t = twitter.Twitter(domain="api.twitter.com",api_version="1.1",secure=True, auth=auth) |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
50 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
51 |
conn_str = options.conn_str.strip() |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
52 |
if not re.match("^\w+://.+", conn_str): |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
53 |
conn_str = 'sqlite:///' + conn_str |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
54 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
55 |
engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True) |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
56 |
session = None |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
57 |
try: |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
58 |
session = Session() |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
59 |
#conn.row_factory = sqlite3.Row |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
60 |
#curs = conn.cursor() |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
61 |
#curs.execute("create table if not exists tweet_tweet (json);") |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
62 |
#conn.commit() |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
63 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
64 |
results = None |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
65 |
page = 1 |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
66 |
print "screen_name: %r - userid: %r" % (options.screen_name, options.user_id) |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
67 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
68 |
rargs = {'count': options.rpp} |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
69 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
70 |
if options.screen_name: |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
71 |
rargs['screen_name'] = options.screen_name |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
72 |
if options.user_id: |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
73 |
rargs['user_id'] = options.user_id |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
74 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
75 |
max_id = -1 |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
76 |
while page <= int(3200/int(options.rpp)) and ( results is None or len(results) > 0) and (max_id > 0 or max_id==-1): |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
77 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
78 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
79 |
if max_id>0: |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
80 |
rargs['max_id'] = max_id |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
81 |
results = t.statuses.user_timeline(**rargs) |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
82 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
83 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
84 |
for tweet in results: |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
85 |
print tweet |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
86 |
max_id = tweet.get('id', 0) |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
87 |
tweet_str = anyjson.serialize(tweet) |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
88 |
p = processor.TwitterProcessorStatus(json_dict=tweet, json_txt=tweet_str, source_id=None, session=session, consumer_token=(options.consumer_key, options.consumer_secret), access_token=access_token, token_filename=options.token_filename, user_query_twitter=False, logger=None) |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
89 |
p.process() |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
90 |
session.flush() |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
91 |
session.commit() |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
92 |
page += 1 |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
93 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
94 |
finally: |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
95 |
if session: |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
96 |
session.close() |
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
97 |
|
|
d92b7a46e5c1
enmi 14
Yves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
parents:
diff
changeset
|
98 |