|
1 import twython |
|
2 from sqlite3 import * |
|
3 import datetime, time |
|
4 import email.utils |
|
5 from optparse import OptionParser |
|
6 import os.path |
|
7 import os |
|
8 import sys |
|
9 import simplejson |
|
10 |
|
11 |
|
12 #options filename rpp page total_pages start_date end_date |
|
13 |
|
14 |
|
15 |
|
16 def adapt_datetime(ts): |
|
17 return time.mktime(ts.timetuple()) |
|
18 |
|
19 def adapt_geo(geo): |
|
20 return simplejson.dumps(geo) |
|
21 |
|
22 def convert_geo(s): |
|
23 return simplejson.loads(s) |
|
24 |
|
25 |
|
26 register_adapter(datetime.datetime, adapt_datetime) |
|
27 register_converter("geo", convert_geo) |
|
28 |
|
29 columns_tweet = [u'favorited', u'truncated', u'text', u'created_at', u'source', u'in_reply_to_status_id', u'in_reply_to_screen_name', u'in_reply_to_user_id', u'geo', u'id', u'user'] |
|
30 columns_user = [u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'geo_enabled', u'profile_background_image_url', u'screen_name', u'profile_background_tile', u'favourites_count', u'name', u'url', u'created_at', u'time_zone', u'profile_sidebar_border_color', u'following'] |
|
31 |
|
32 def processDate(entry): |
|
33 ts = email.utils.parsedate(entry["created_at"]) |
|
34 entry["created_at_ts"] = datetime.datetime.fromtimestamp(time.mktime(ts)) |
|
35 |
|
36 def processPage(page, cursor, debug): |
|
37 for entry in page: |
|
38 if debug: |
|
39 print "ENTRY : " + repr(entry) |
|
40 curs.execute("select id from tweet_tweet where id = ?", (entry["id"],)) |
|
41 res = curs.fetchone() |
|
42 if res: |
|
43 continue |
|
44 |
|
45 entry_user = entry["user"] |
|
46 processDate(entry_user) |
|
47 cursor.execute("insert into tweet_user ("+",".join(entry_user.keys())+") values (:"+",:".join(entry_user.keys())+");", entry_user); |
|
48 new_id = cursor.lastrowid |
|
49 processDate(entry) |
|
50 entry["user"] = new_id |
|
51 if entry["geo"]: |
|
52 entry["geo"] = adapt_geo(entry["geo"]) |
|
53 new_id = cursor.execute("insert into tweet_tweet ("+",".join(entry.keys())+") values (:"+",:".join(entry.keys())+");", entry); |
|
54 |
|
55 |
|
56 if __name__ == "__main__" : |
|
57 |
|
58 parser = OptionParser() |
|
59 parser.add_option("-f", "--file", dest="filename", |
|
60 help="write tweet to FILE", metavar="FILE", default="enmi2010_twitter_rest.db") |
|
61 parser.add_option("-r", "--rpp", dest="rpp", |
|
62 help="Results per page", metavar="RESULT_PER_PAGE", default=200, type='int') |
|
63 parser.add_option("-p", "--page", dest="page", |
|
64 help="page result", metavar="PAGE", default=1, type='int') |
|
65 parser.add_option("-t", "--total-page", dest="total_page", |
|
66 help="Total page number", metavar="TOTAL_PAGE", default=16, type='int') |
|
67 parser.add_option("-s", "--screenname", dest="screen_name", |
|
68 help="Twitter screen name", metavar="SCREEN_NAME") |
|
69 parser.add_option("-u", "--user", dest="username", |
|
70 help="Twitter user", metavar="USER", default=None) |
|
71 parser.add_option("-w", "--password", dest="password", |
|
72 help="Twitter password", metavar="PASSWORD", default=None) |
|
73 parser.add_option("-n", "--new", dest="new", action="store_true", |
|
74 help="new database", default=False) |
|
75 parser.add_option("-d", "--debug", dest="debug", action="store_true", |
|
76 help="debug", default=False) |
|
77 |
|
78 |
|
79 |
|
80 (options, args) = parser.parse_args() |
|
81 |
|
82 if options.debug: |
|
83 print "OPTIONS : " |
|
84 print repr(options) |
|
85 |
|
86 if options.screen_name is None: |
|
87 print "No Screen name. Exiting" |
|
88 sys.exit() |
|
89 |
|
90 if options.new and os.path.exists(options.filename): |
|
91 os.remove(options.filename) |
|
92 |
|
93 conn = connect(options.filename) |
|
94 conn.row_factory = Row |
|
95 curs = conn.cursor() |
|
96 |
|
97 curs.execute("create table if not exists tweet_user ("+ ",".join(columns_user) +", created_at_ts integer);") |
|
98 |
|
99 curs.execute("create table if not exists tweet_tweet ("+ ",".join(columns_tweet) +", created_at_ts integer);") |
|
100 curs.execute("create index if not exists id_index on tweet_tweet (id asc);"); |
|
101 |
|
102 curs.execute("select count(*) from tweet_tweet;") |
|
103 res = curs.fetchone() |
|
104 |
|
105 old_total = res[0] |
|
106 |
|
107 twitter = twython.setup(username=options.username, password=options.password, headers="IRI enmi (python urllib)") |
|
108 twitter = twython.Twython(twitter_token = "54ThDZhpEjokcMgHJOMnQA", twitter_secret = "wUoL9UL2T87tfc97R0Dff2EaqRzpJ5XGdmaN2XK3udA") |
|
109 |
|
110 search_results = None |
|
111 page = options.page-1 |
|
112 |
|
113 while (page < options.total_page and ( search_results is None or len(search_results) > 0)): |
|
114 page += 1 |
|
115 try: |
|
116 search_results = twitter.getUserTimeline(screen_name=options.screen_name, count=options.rpp, page=page) |
|
117 except twython.TwythonError, (e): |
|
118 print "NAME : "+ options.screen_name + " ERROR : " + repr(e.msg) |
|
119 break |
|
120 print "NAME : "+ options.screen_name +" PAGE : " + repr(page) + " tweet: " + repr(len(search_results)) + " (total page : " + unicode(options.total_page) + " : rpp : "+unicode(options.rpp)+")" |
|
121 processPage(search_results, curs, options.debug) |
|
122 |
|
123 conn.commit() |
|
124 |
|
125 curs.execute("select count(*) from tweet_tweet;") |
|
126 res = curs.fetchone() |
|
127 |
|
128 total = res[0] |
|
129 |
|
130 print "Tweet for " + options.screen_name + " : " + unicode(total - old_total) +", Tweet total : " + repr(total) |
|
131 |
|
132 conn.close() |
|
133 |
|
134 |