|
1 import argparse |
|
2 import re |
|
3 |
|
4 import anyjson |
|
5 import twitter |
|
6 |
|
7 from iri_tweet import models, processor, utils |
|
8 |
|
9 |
|
10 def get_options(): |
|
11 |
|
12 usage = "usage: %(prog)s [options] <connection_str_or_filepath>" |
|
13 |
|
14 parser = argparse.ArgumentParser(usage=usage) |
|
15 |
|
16 parser.add_argument(dest="conn_str", |
|
17 help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR") |
|
18 parser.add_argument("-S", dest="screen_name", |
|
19 help="Screen name", metavar="SCREENNAME", default=None) |
|
20 parser.add_argument("-U", dest="user_id", |
|
21 help="User id", metavar="USERID", default=None) |
|
22 parser.add_argument("-P", dest="rpp", metavar="RPP", default="200", |
|
23 help="Result per page") |
|
24 parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token", |
|
25 help="Token file name") |
|
26 parser.add_argument("-k", "--key", dest="consumer_key", |
|
27 help="Twitter consumer key", metavar="CONSUMER_KEY") |
|
28 parser.add_argument("-s", "--secret", dest="consumer_secret", |
|
29 help="Twitter consumer secret", metavar="CONSUMER_SECRET") |
|
30 |
|
31 utils.set_logging_options(parser) |
|
32 |
|
33 return parser.parse_args() |
|
34 |
|
35 def get_auth(options, access_token): |
|
36 consumer_key = options.consumer_key |
|
37 consumer_secret = options.consumer_secret |
|
38 auth = twitter.OAuth(token=access_token[0], token_secret=access_token[1], consumer_key=consumer_key, consumer_secret=consumer_secret) |
|
39 return auth |
|
40 |
|
41 |
|
42 if __name__ == "__main__": |
|
43 |
|
44 options = get_options() |
|
45 |
|
46 access_token = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename) |
|
47 auth = get_auth(options, access_token) |
|
48 |
|
49 t = twitter.Twitter(domain="api.twitter.com",api_version="1.1",secure=True, auth=auth) |
|
50 |
|
51 conn_str = options.conn_str.strip() |
|
52 if not re.match("^\w+://.+", conn_str): |
|
53 conn_str = 'sqlite:///' + conn_str |
|
54 |
|
55 engine, metadata, Session = models.setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all=True) |
|
56 session = None |
|
57 try: |
|
58 session = Session() |
|
59 #conn.row_factory = sqlite3.Row |
|
60 #curs = conn.cursor() |
|
61 #curs.execute("create table if not exists tweet_tweet (json);") |
|
62 #conn.commit() |
|
63 |
|
64 results = None |
|
65 page = 1 |
|
66 print "screen_name: %r - userid: %r" % (options.screen_name, options.user_id) |
|
67 |
|
68 rargs = {'count': options.rpp} |
|
69 |
|
70 if options.screen_name: |
|
71 rargs['screen_name'] = options.screen_name |
|
72 if options.user_id: |
|
73 rargs['user_id'] = options.user_id |
|
74 |
|
75 max_id = -1 |
|
76 while page <= int(3200/int(options.rpp)) and ( results is None or len(results) > 0) and (max_id > 0 or max_id==-1): |
|
77 |
|
78 |
|
79 if max_id>0: |
|
80 rargs['max_id'] = max_id |
|
81 results = t.statuses.user_timeline(**rargs) |
|
82 |
|
83 |
|
84 for tweet in results: |
|
85 print tweet |
|
86 max_id = tweet.get('id', 0) |
|
87 tweet_str = anyjson.serialize(tweet) |
|
88 p = processor.TwitterProcessorStatus(json_dict=tweet, json_txt=tweet_str, source_id=None, session=session, consumer_token=(options.consumer_key, options.consumer_secret), access_token=access_token, token_filename=options.token_filename, user_query_twitter=False, logger=None) |
|
89 p.process() |
|
90 session.flush() |
|
91 session.commit() |
|
92 page += 1 |
|
93 |
|
94 finally: |
|
95 if session: |
|
96 session.close() |
|
97 |
|
98 |