script/stream/recorder_tweetstream.py
changeset 207 621fa6caec0c
parent 206 6d642d650470
child 242 cdd7d3c0549c
equal deleted inserted replaced
206:6d642d650470 207:621fa6caec0c
     8 import os
     8 import os
     9 import socket
     9 import socket
    10 import sys
    10 import sys
    11 import time
    11 import time
    12 import tweetstream
    12 import tweetstream
    13 import tweetstream.auth
    13 import tweepy.auth
    14 socket._fileobject.default_bufsize = 0
    14 socket._fileobject.default_bufsize = 0
    15 
    15 
    16 
    16 
    17 
    17 
    18 #columns_tweet = [u'favorited', u'truncated', u'text', u'created_at', u'source', u'in_reply_to_status_id', u'in_reply_to_screen_name', u'in_reply_to_user_id', u'geo', u'id', u'user']
    18 #columns_tweet = [u'favorited', u'truncated', u'text', u'created_at', u'source', u'in_reply_to_status_id', u'in_reply_to_screen_name', u'in_reply_to_user_id', u'geo', u'id', u'user']
    20 #columns_user = [u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'geo_enabled', u'profile_background_image_url', u'screen_name', u'profile_background_tile', u'favourites_count', u'name', u'url', u'created_at', u'time_zone', u'profile_sidebar_border_color', u'following']
    20 #columns_user = [u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'geo_enabled', u'profile_background_image_url', u'screen_name', u'profile_background_tile', u'favourites_count', u'name', u'url', u'created_at', u'time_zone', u'profile_sidebar_border_color', u'following']
    21 columns_user = [u'follow_request_sent', u'profile_use_background_image', u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'id_str', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'show_all_inline_media', u'geo_enabled', u'profile_background_image_url', u'name', u'lang', u'following', u'profile_background_tile', u'favourites_count', u'screen_name', u'url', u'created_at', u'contributors_enabled', u'time_zone', u'profile_sidebar_border_color', u'is_translator', u'listed_count']
    21 columns_user = [u'follow_request_sent', u'profile_use_background_image', u'id', u'verified', u'profile_sidebar_fill_color', u'profile_text_color', u'followers_count', u'protected', u'location', u'profile_background_color', u'id_str', u'utc_offset', u'statuses_count', u'description', u'friends_count', u'profile_link_color', u'profile_image_url', u'notifications', u'show_all_inline_media', u'geo_enabled', u'profile_background_image_url', u'name', u'lang', u'following', u'profile_background_tile', u'favourites_count', u'screen_name', u'url', u'created_at', u'contributors_enabled', u'time_zone', u'profile_sidebar_border_color', u'is_translator', u'listed_count']
    22 #just put it in a sqlite3 tqble
    22 #just put it in a sqlite3 tqble
    23 
    23 
    24 
    24 
    25 class ReconnectingTweetStream(tweetstream.TrackStream):
    25 class ReconnectingTweetStream(tweetstream.FilterStream):
    26     """TweetStream class that automatically tries to reconnect if the
    26     """TweetStream class that automatically tries to reconnect if the
    27     connecting goes down. Reconnecting, and waiting for reconnecting, is
    27     connecting goes down. Reconnecting, and waiting for reconnecting, is
    28     blocking.
    28     blocking.
    29 
    29 
    30     :param username: See :TweetStream:
    30     :param username: See :TweetStream:
    42 
    42 
    43     :retry_wait: Time to wait before reconnecting in seconds. Default is 5
    43     :retry_wait: Time to wait before reconnecting in seconds. Default is 5
    44 
    44 
    45     """
    45     """
    46 
    46 
    47     def __init__(self, auth, keywords, url="track", reconnects=3, error_cb=None, retry_wait=5, **kwargs):
    47     def __init__(self, auth, keywords, reconnects=3, error_cb=None, retry_wait=5, **kwargs):
    48         self.max_reconnects = reconnects
    48         self.max_reconnects = reconnects
    49         self.retry_wait = retry_wait
    49         self.retry_wait = retry_wait
    50         self._reconnects = 0
    50         self._reconnects = 0
    51         self._error_cb = error_cb
    51         self._error_cb = error_cb
    52         super(ReconnectingTweetStream, self).__init__(auth, keywords, url, **kwargs)
    52         super(ReconnectingTweetStream, self).__init__(auth=auth, track=keywords, **kwargs)
    53 
    53 
    54     def next(self):
    54     def next(self):
    55         while True:
    55         while True:
    56             try:
    56             try:
    57                 return super(ReconnectingTweetStream, self).next()
    57                 return super(ReconnectingTweetStream, self).next()
    88 
    88 
    89     track_list = track or raw_input('Keywords to track (comma seperated): ').strip()
    89     track_list = track or raw_input('Keywords to track (comma seperated): ').strip()
    90     track_list = [k for k in track_list.split(',')]
    90     track_list = [k for k in track_list.split(',')]
    91     
    91     
    92     if username and password:
    92     if username and password:
    93         auth = tweetstream.auth.BasicAuthHandler(username, password)        
    93         auth = tweepy.auth.BasicAuthHandler(username, password)        
    94     else:
    94     else:
    95         consumer_key = models.CONSUMER_KEY
    95         consumer_key = models.CONSUMER_KEY
    96         consumer_secret = models.CONSUMER_SECRET
    96         consumer_secret = models.CONSUMER_SECRET
    97         auth = tweetstream.auth.OAuthHandler(consumer_key, consumer_secret, secure=False)
    97         auth = tweepy.auth.OAuthHandler(consumer_key, consumer_secret, secure=False)
    98         auth.set_access_token(*(utils.get_oauth_token(token_filename)))
    98         auth.set_access_token(*(utils.get_oauth_token(token_filename)))
    99     
    99     
   100     if duration >= 0:
   100     if duration >= 0:
   101         end_ts = datetime.datetime.utcnow() + datetime.timedelta(seconds=duration)
   101         end_ts = datetime.datetime.utcnow() + datetime.timedelta(seconds=duration)
   102     
   102     
   105         for tweet in stream:            
   105         for tweet in stream:            
   106             if duration >= 0 and  datetime.datetime.utcnow() >= end_ts:
   106             if duration >= 0 and  datetime.datetime.utcnow() >= end_ts:
   107                 print "Stop recording after %d seconds." % (duration)
   107                 print "Stop recording after %d seconds." % (duration)
   108                 break
   108                 break
   109             process_tweet(tweet, session, debug, token_filename)
   109             process_tweet(tweet, session, debug, token_filename)
   110             logging.info("Tweet count: %d - current rate : %d - running : %s"%(stream.count, stream.rate, int(time.time()-stream.starttime)))
   110             logging.info("Tweet count: %d - current rate : %.2f - running : %s"%(stream.count, stream.rate, int(time.time()-stream.starttime)))
   111             session.commit()
   111             session.commit()
   112     finally:
   112     finally:
   113         stream.close()
   113         stream.close()
   114         
   114         
   115 def get_options():
   115 def get_options():