| changeset 207 | 621fa6caec0c |
| parent 206 | 6d642d650470 |
| child 242 | cdd7d3c0549c |
| 206:6d642d650470 | 207:621fa6caec0c |
|---|---|
8 import os |
8 import os |
9 import socket |
9 import socket |
10 import sys |
10 import sys |
11 import time |
11 import time |
12 import tweetstream |
12 import tweetstream |
13 import tweetstream.auth |
13 import tweepy.auth |
14 socket._fileobject.default_bufsize = 0 |
14 socket._fileobject.default_bufsize = 0 |
15 |
15 |
16 |
16 |
17 |
17 |
18 #columns_tweet = [u'favorited', u'truncated', u'text', u'created_at', u'source', u'in_reply_to_status_id', u'in_reply_to_screen_name', u'in_reply_to_user_id', u'geo', u'id', u'user'] |
18 #columns_tweet = [u'favorited', u'truncated', u'text', u'created_at', u'source', u'in_reply_to_status_id', u'in_reply_to_screen_name', u'in_reply_to_user_id', u'geo', u'id', u'user'] |
20 #columns_user = [u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'geo_enabled', u'profile_background_image_url', u'screen_name', u'profile_background_tile', u'favourites_count', u'name', u'url', u'created_at', u'time_zone', u'profile_sidebar_border_color', u'following'] |
20 #columns_user = [u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'geo_enabled', u'profile_background_image_url', u'screen_name', u'profile_background_tile', u'favourites_count', u'name', u'url', u'created_at', u'time_zone', u'profile_sidebar_border_color', u'following'] |
21 columns_user = [u'follow_request_sent', u'profile_use_background_image', u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'id_str', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'show_all_inline_media', u'geo_enabled', u'profile_background_image_url', u'name', u'lang', u'following', u'profile_background_tile', u'favourites_count', u'screen_name', u'url', u'created_at', u'contributors_enabled', u'time_zone', u'profile_sidebar_border_color', u'is_translator', u'listed_count'] |
21 columns_user = [u'follow_request_sent', u'profile_use_background_image', u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'id_str', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'show_all_inline_media', u'geo_enabled', u'profile_background_image_url', u'name', u'lang', u'following', u'profile_background_tile', u'favourites_count', u'screen_name', u'url', u'created_at', u'contributors_enabled', u'time_zone', u'profile_sidebar_border_color', u'is_translator', u'listed_count'] |
22 #just put it in a sqlite3 tqble |
22 #just put it in a sqlite3 tqble |
23 |
23 |
24 |
24 |
25 class ReconnectingTweetStream(tweetstream.TrackStream): |
25 class ReconnectingTweetStream(tweetstream.FilterStream): |
26 """TweetStream class that automatically tries to reconnect if the |
26 """TweetStream class that automatically tries to reconnect if the |
27 connecting goes down. Reconnecting, and waiting for reconnecting, is |
27 connecting goes down. Reconnecting, and waiting for reconnecting, is |
28 blocking. |
28 blocking. |
29 |
29 |
30 :param username: See :TweetStream: |
30 :param username: See :TweetStream: |
42 |
42 |
43 :retry_wait: Time to wait before reconnecting in seconds. Default is 5 |
43 :retry_wait: Time to wait before reconnecting in seconds. Default is 5 |
44 |
44 |
45 """ |
45 """ |
46 |
46 |
47 def __init__(self, auth, keywords, url="track", reconnects=3, error_cb=None, retry_wait=5, **kwargs): |
47 def __init__(self, auth, keywords, reconnects=3, error_cb=None, retry_wait=5, **kwargs): |
48 self.max_reconnects = reconnects |
48 self.max_reconnects = reconnects |
49 self.retry_wait = retry_wait |
49 self.retry_wait = retry_wait |
50 self._reconnects = 0 |
50 self._reconnects = 0 |
51 self._error_cb = error_cb |
51 self._error_cb = error_cb |
52 super(ReconnectingTweetStream, self).__init__(auth, keywords, url, **kwargs) |
52 super(ReconnectingTweetStream, self).__init__(auth=auth, track=keywords, **kwargs) |
53 |
53 |
54 def next(self): |
54 def next(self): |
55 while True: |
55 while True: |
56 try: |
56 try: |
57 return super(ReconnectingTweetStream, self).next() |
57 return super(ReconnectingTweetStream, self).next() |
88 |
88 |
89 track_list = track or raw_input('Keywords to track (comma seperated): ').strip() |
89 track_list = track or raw_input('Keywords to track (comma seperated): ').strip() |
90 track_list = [k for k in track_list.split(',')] |
90 track_list = [k for k in track_list.split(',')] |
91 |
91 |
92 if username and password: |
92 if username and password: |
93 auth = tweetstream.auth.BasicAuthHandler(username, password) |
93 auth = tweepy.auth.BasicAuthHandler(username, password) |
94 else: |
94 else: |
95 consumer_key = models.CONSUMER_KEY |
95 consumer_key = models.CONSUMER_KEY |
96 consumer_secret = models.CONSUMER_SECRET |
96 consumer_secret = models.CONSUMER_SECRET |
97 auth = tweetstream.auth.OAuthHandler(consumer_key, consumer_secret, secure=False) |
97 auth = tweepy.auth.OAuthHandler(consumer_key, consumer_secret, secure=False) |
98 auth.set_access_token(*(utils.get_oauth_token(token_filename))) |
98 auth.set_access_token(*(utils.get_oauth_token(token_filename))) |
99 |
99 |
100 if duration >= 0: |
100 if duration >= 0: |
101 end_ts = datetime.datetime.utcnow() + datetime.timedelta(seconds=duration) |
101 end_ts = datetime.datetime.utcnow() + datetime.timedelta(seconds=duration) |
102 |
102 |
105 for tweet in stream: |
105 for tweet in stream: |
106 if duration >= 0 and datetime.datetime.utcnow() >= end_ts: |
106 if duration >= 0 and datetime.datetime.utcnow() >= end_ts: |
107 print "Stop recording after %d seconds." % (duration) |
107 print "Stop recording after %d seconds." % (duration) |
108 break |
108 break |
109 process_tweet(tweet, session, debug, token_filename) |
109 process_tweet(tweet, session, debug, token_filename) |
110 logging.info("Tweet count: %d - current rate : %d - running : %s"%(stream.count, stream.rate, int(time.time()-stream.starttime))) |
110 logging.info("Tweet count: %d - current rate : %.2f - running : %s"%(stream.count, stream.rate, int(time.time()-stream.starttime))) |
111 session.commit() |
111 session.commit() |
112 finally: |
112 finally: |
113 stream.close() |
113 stream.close() |
114 |
114 |
115 def get_options(): |
115 def get_options(): |