script/lib/tweetstream/tests/test_tweetstream.py
changeset 207 621fa6caec0c
parent 15 5d552b6a0e55
parent 13 79b6e132e3d7
child 527 80e5b9543cac
equal deleted inserted replaced
206:6d642d650470 207:621fa6caec0c
     1 import contextlib
     1 import contextlib
     2 import threading
     2 import threading
     3 import time
     3 import time
     4 from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
     4 from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
     5 
     5 
     6 from nose.tools import assert_raises
     6 from tweetstream import TweetStream, FollowStream, TrackStream, LocationStream
     7 from tweetstream import TweetStream, FollowStream, TrackStream
     7 from tweetstream import ConnectionError, AuthenticationError, SampleStream
     8 from tweetstream import ConnectionError, AuthenticationError
     8 from tweepy.auth import BasicAuthHandler   
     9 from tweetstream import auth
     9 
       
    10 import pytest
       
    11 from pytest import raises
       
    12 slow = pytest.mark.slow
    10 
    13 
    11 from servercontext import test_server
    14 from servercontext import test_server
    12 
    15 
    13 single_tweet = r"""{"in_reply_to_status_id":null,"in_reply_to_user_id":null,"favorited":false,"created_at":"Tue Jun 16 10:40:14 +0000 2009","in_reply_to_screen_name":null,"text":"record industry just keeps on amazing me: http:\/\/is.gd\/13lFo - $150k per song you've SHARED, not that somebody has actually DOWNLOADED.","user":{"notifications":null,"profile_background_tile":false,"followers_count":206,"time_zone":"Copenhagen","utc_offset":3600,"friends_count":191,"profile_background_color":"ffffff","profile_image_url":"http:\/\/s3.amazonaws.com\/twitter_production\/profile_images\/250715794\/profile_normal.png","description":"Digital product developer, currently at Opera Software. My tweets are my opinions, not those of my employer.","verified_profile":false,"protected":false,"favourites_count":0,"profile_text_color":"3C3940","screen_name":"eiriksnilsen","name":"Eirik Stridsklev N.","following":null,"created_at":"Tue May 06 12:24:12 +0000 2008","profile_background_image_url":"http:\/\/s3.amazonaws.com\/twitter_production\/profile_background_images\/10531192\/160x600opera15.gif","profile_link_color":"0099B9","profile_sidebar_fill_color":"95E8EC","url":"http:\/\/www.stridsklev-nilsen.no\/eirik","id":14672543,"statuses_count":506,"profile_sidebar_border_color":"5ED4DC","location":"Oslo, Norway"},"id":2190767504,"truncated":false,"source":"<a href=\"http:\/\/widgets.opera.com\/widget\/7206\">Twitter Opera widget<\/a>"}"""
    16 single_tweet = r"""{"in_reply_to_status_id":null,"in_reply_to_user_id":null,"favorited":false,"created_at":"Tue Jun 16 10:40:14 +0000 2009","in_reply_to_screen_name":null,"text":"record industry just keeps on amazing me: http:\/\/is.gd\/13lFo - $150k per song you've SHARED, not that somebody has actually DOWNLOADED.","user":{"notifications":null,"profile_background_tile":false,"followers_count":206,"time_zone":"Copenhagen","utc_offset":3600,"friends_count":191,"profile_background_color":"ffffff","profile_image_url":"http:\/\/s3.amazonaws.com\/twitter_production\/profile_images\/250715794\/profile_normal.png","description":"Digital product developer, currently at Opera Software. My tweets are my opinions, not those of my employer.","verified_profile":false,"protected":false,"favourites_count":0,"profile_text_color":"3C3940","screen_name":"eiriksnilsen","name":"Eirik Stridsklev N.","following":null,"created_at":"Tue May 06 12:24:12 +0000 2008","profile_background_image_url":"http:\/\/s3.amazonaws.com\/twitter_production\/profile_background_images\/10531192\/160x600opera15.gif","profile_link_color":"0099B9","profile_sidebar_fill_color":"95E8EC","url":"http:\/\/www.stridsklev-nilsen.no\/eirik","id":14672543,"statuses_count":506,"profile_sidebar_border_color":"5ED4DC","location":"Oslo, Norway"},"id":2190767504,"truncated":false,"source":"<a href=\"http:\/\/widgets.opera.com\/widget\/7206\">Twitter Opera widget<\/a>"}"""
    14 
    17 
    15 
    18 
    16 def test_bad_auth():
    19 def parameterized(funcarglist):
       
    20     def wrapper(function):
       
    21         function.funcarglist = funcarglist
       
    22         return function
       
    23     return wrapper
       
    24 
       
    25 def pytest_generate_tests(metafunc):
       
    26     for funcargs in getattr(metafunc.function, 'funcarglist', ()):
       
    27         metafunc.addcall(funcargs=funcargs)
       
    28 
       
    29 
       
    30 streamtypes = [
       
    31     dict(cls=TweetStream, args=[], kwargs=dict()),
       
    32     dict(cls=SampleStream, args=[], kwargs=dict()),
       
    33     dict(cls=FollowStream, args=[[1, 2, 3]], kwargs=dict()),
       
    34     dict(cls=TrackStream, args=["opera"], kwargs=dict()),
       
    35     dict(cls=LocationStream, args=["123,4321"], kwargs=dict())
       
    36 ]
       
    37 
       
    38 
       
    39 @parameterized(streamtypes)
       
    40 def test_bad_auth(cls, args, kwargs):
    17     """Test that the proper exception is raised when the user could not be
    41     """Test that the proper exception is raised when the user could not be
    18     authenticated"""
    42     authenticated"""
    19     def auth_denied(request):
    43     def auth_denied(request):
    20         request.send_error(401)
    44         request.send_error(401)
    21 
    45 
    22     with test_server(handler=auth_denied, methods=("post", "get"),
    46     with test_server(handler=auth_denied, methods=("post", "get"), port="random") as server:
    23                      port="random") as server:
    47         auth = BasicAuthHandler("user", "passwd")
    24         stream = TweetStream(auth.BasicAuthHandler("foo", "bar"), url=server.baseurl)
    48         stream = cls(auth, *args, url=server.baseurl)
    25         assert_raises(AuthenticationError, stream.next)
       
    26 
       
    27         stream = FollowStream(auth.BasicAuthHandler("foo", "bar"), [1, 2, 3], url=server.baseurl)
       
    28         assert_raises(AuthenticationError, stream.next)
       
    29 
       
    30         stream = TrackStream(auth.BasicAuthHandler("foo", "bar"), ["opera"], url=server.baseurl)
       
    31         assert_raises(AuthenticationError, stream.next)
       
    32 
    49 
    33 
    50 
    34 def test_404_url():
    51 @parameterized(streamtypes)
       
    52 def test_404_url(cls, args, kwargs):
    35     """Test that the proper exception is raised when the stream URL can't be
    53     """Test that the proper exception is raised when the stream URL can't be
    36     found"""
    54     found"""
    37     def not_found(request):
    55     def not_found(request):
    38         request.send_error(404)
    56         request.send_error(404)
    39 
    57 
    40     with test_server(handler=not_found, methods=("post", "get"),
    58     with test_server(handler=not_found, methods=("post", "get"), port="random") as server:
    41                      port="random") as server:
    59         auth = BasicAuthHandler("user", "passwd")
    42         stream = TweetStream(auth.BasicAuthHandler("foo", "bar"), url=server.baseurl)
    60         stream = cls(auth, *args, url=server.baseurl)
    43         assert_raises(ConnectionError, stream.next)
       
    44 
       
    45         stream = FollowStream(auth.BasicAuthHandler("foo", "bar"), [1, 2, 3], url=server.baseurl)
       
    46         assert_raises(ConnectionError, stream.next)
       
    47 
       
    48         stream = TrackStream(auth.BasicAuthHandler("foo", "bar"), ["opera"], url=server.baseurl)
       
    49         assert_raises(ConnectionError, stream.next)
       
    50 
    61 
    51 
    62 
    52 def test_bad_content():
    63 @parameterized(streamtypes)
       
    64 def test_bad_content(cls, args, kwargs):
    53     """Test error handling if we are given invalid data"""
    65     """Test error handling if we are given invalid data"""
    54     def bad_content(request):
    66     def bad_content(request):
    55         for n in xrange(10):
    67         for n in xrange(10):
    56             # what json we pass doesn't matter. It's not verifying the
    68             # what json we pass doesn't matter. It's not verifying the
    57             # strcuture, only checking that it's parsable
    69             # strcuture, only checking that it's parsable
    58             yield "[1,2,3]"
    70             yield "[1,2,3]"
    59         yield "[1,2, I need no stinking close brace"
    71         yield "[1,2, I need no stinking close brace"
    60         yield "[1,2,3]"
    72         yield "[1,2,3]"
    61 
    73 
    62     def do_test(klass, *args):
    74 
    63         with test_server(handler=bad_content, methods=("post", "get"),
    75     with raises(ConnectionError):
    64                          port="random") as server:
    76         with test_server(handler=bad_content, methods=("post", "get"), port="random") as server:
    65             stream = klass(auth.BasicAuthHandler("foo", "bar"), *args, url=server.baseurl)
    77             auth = BasicAuthHandler("user", "passwd")
       
    78             stream = cls(auth, *args, url=server.baseurl)
    66             for tweet in stream:
    79             for tweet in stream:
    67                 pass
    80                 pass
    68 
    81 
    69     assert_raises(ConnectionError, do_test, TweetStream)
       
    70     assert_raises(ConnectionError, do_test, FollowStream, [1, 2, 3])
       
    71     assert_raises(ConnectionError, do_test, TrackStream, ["opera"])
       
    72 
    82 
    73 
    83 @parameterized(streamtypes)
    74 def test_closed_connection():
    84 def test_closed_connection(cls, args, kwargs):
    75     """Test error handling if server unexpectedly closes connection"""
    85     """Test error handling if server unexpectedly closes connection"""
    76     cnt = 1000
    86     cnt = 1000
    77     def bad_content(request):
    87     def bad_content(request):
    78         for n in xrange(cnt):
    88         for n in xrange(cnt):
    79             # what json we pass doesn't matter. It's not verifying the
    89             # what json we pass doesn't matter. It's not verifying the
    80             # strcuture, only checking that it's parsable
    90             # strcuture, only checking that it's parsable
    81             yield "[1,2,3]"
    91             yield "[1,2,3]"
    82 
    92 
    83     def do_test(klass, *args):
    93     with raises(ConnectionError):
    84         with test_server(handler=bad_content, methods=("post", "get"),
    94         with test_server(handler=bad_content, methods=("post", "get"), port="random") as server:
    85                          port="random") as server:
    95             auth = BasicAuthHandler("foo", "bar")
    86             stream = klass(auth.BasicAuthHandler("foo", "bar"), *args, url=server.baseurl)
    96             stream = cls(auth, *args, url=server.baseurl)
    87             for tweet in stream:
    97             for tweet in stream:
    88                 pass
    98                 pass
    89 
    99 
    90     assert_raises(ConnectionError, do_test, TweetStream)
   100 
    91     assert_raises(ConnectionError, do_test, FollowStream, [1, 2, 3])
   101 @parameterized(streamtypes)
    92     assert_raises(ConnectionError, do_test, TrackStream, ["opera"])
   102 def test_bad_host(cls, args, kwargs):
       
   103     """Test behaviour if we can't connect to the host"""
       
   104     with raises(ConnectionError):
       
   105         stream = cls("username", "passwd", *args, url="http://wedfwecfghhreewerewads.foo")
       
   106         stream.next()
    93 
   107 
    94 
   108 
    95 def test_bad_host():
   109 @parameterized(streamtypes)
    96     """Test behaviour if we can't connect to the host"""
   110 def smoke_test_receive_tweets(cls, args, kwargs):
    97     stream = TweetStream(auth.BasicAuthHandler("foo", "bar"), url="http://bad.egewdvsdswefdsf.com/")
       
    98     assert_raises(ConnectionError, stream.next)
       
    99 
       
   100     stream = FollowStream(auth.BasicAuthHandler("foo", "bar"), [1, 2, 3], url="http://zegwefdsf.com/")
       
   101     assert_raises(ConnectionError, stream.next)
       
   102 
       
   103     stream = TrackStream(auth.BasicAuthHandler("foo", "bar"), ["foo"], url="http://aswefdsews.com/")
       
   104     assert_raises(ConnectionError, stream.next)
       
   105 
       
   106 
       
   107 def smoke_test_receive_tweets():
       
   108     """Receive 100k tweets and disconnect (slow)"""
   111     """Receive 100k tweets and disconnect (slow)"""
   109     total = 100000
   112     total = 100000
   110 
   113 
   111     def tweetsource(request):
   114     def tweetsource(request):
   112         while True:
   115         while True:
   113             yield single_tweet + "\n"
   116             yield single_tweet + "\n"
   114 
   117 
   115     def do_test(klass, *args):
   118     with test_server(handler=tweetsource, methods=("post", "get"), port="random") as server:
   116         with test_server(handler=tweetsource,
   119         auth = BasicAuthHandler("foo", "bar")
   117                          methods=("post", "get"), port="random") as server:
   120         stream = cls(auth, *args, url=server.baseurl)
   118             stream = klass(auth.BasicAuthHandler("foo", "bar"), *args, url=server.baseurl)
   121         for tweet in stream:
   119             for tweet in stream:
   122             if stream.count == total:
   120                 if stream.count == total:
   123                 break
   121                     break
       
   122 
       
   123     do_test(TweetStream)
       
   124     do_test(FollowStream, [1, 2, 3])
       
   125     do_test(TrackStream, ["foo", "bar"])
       
   126 
   124 
   127 
   125 
   128 def test_keepalive():
   126 @parameterized(streamtypes)
       
   127 def test_keepalive(cls, args, kwargs):
   129     """Make sure we behave sanely when there are keepalive newlines in the
   128     """Make sure we behave sanely when there are keepalive newlines in the
   130     data recevived from twitter"""
   129     data recevived from twitter"""
   131     def tweetsource(request):
   130     def tweetsource(request):
   132         yield single_tweet+"\n"
   131         yield single_tweet+"\n"
   133         yield "\n"
   132         yield "\n"
   141         yield "\n"
   140         yield "\n"
   142         yield "\n"
   141         yield "\n"
   143         yield single_tweet+"\n"
   142         yield single_tweet+"\n"
   144         yield "\n"
   143         yield "\n"
   145 
   144 
   146     def do_test(klass, *args):
   145 
   147         with test_server(handler=tweetsource, methods=("post", "get"),
   146     with test_server(handler=tweetsource, methods=("post", "get"), port="random") as server:
   148                          port="random") as server:
   147         auth = BasicAuthHandler("foo", "bar")
   149             stream = klass(auth.BasicAuthHandler("foo", "bar"), *args, url=server.baseurl)
   148         stream = cls(auth, *args, url=server.baseurl)
   150             try:
   149         try:
   151                 for tweet in stream:
   150             for tweet in stream:
   152                     pass
   151                 pass
   153             except ConnectionError:
   152         except ConnectionError:
   154                 assert stream.count == 3, "Got %s, wanted 3" % stream.count
   153             assert stream.count == 3, "Got %s, wanted 3" % stream.count
   155             else:
   154         else:
   156                 assert False, "Didn't handle keepalive"
   155             assert False, "Didn't handle keepalive"
   157 
   156 
   158 
   157 
   159     do_test(TweetStream)
   158 @slow
   160     do_test(FollowStream, [1, 2, 3])
   159 @parameterized(streamtypes)
   161     do_test(TrackStream, ["foo", "bar"])
   160 def test_buffering(cls, args, kwargs):
   162 
       
   163 
       
   164 def test_buffering():
       
   165     """Test if buffering stops data from being returned immediately.
   161     """Test if buffering stops data from being returned immediately.
   166     If there is some buffering in play that might mean data is only returned
   162     If there is some buffering in play that might mean data is only returned
   167     from the generator when the buffer is full. If buffer is bigger than a
   163     from the generator when the buffer is full. If buffer is bigger than a
   168     tweet, this will happen. Default buffer size in the part of socket lib
   164     tweet, this will happen. Default buffer size in the part of socket lib
   169     that enables readline is 8k. Max tweet length is around 3k."""
   165     that enables readline is 8k. Max tweet length is around 3k."""
   174         # need to yield a bunch here so we're sure we'll return from the
   170         # need to yield a bunch here so we're sure we'll return from the
   175         # blocking call in case the buffering bug is present.
   171         # blocking call in case the buffering bug is present.
   176         for n in xrange(100):
   172         for n in xrange(100):
   177             yield single_tweet+"\n"
   173             yield single_tweet+"\n"
   178 
   174 
   179     def do_test(klass, *args):
       
   180         with test_server(handler=tweetsource, methods=("post", "get"),
       
   181                          port="random") as server:
       
   182             stream = klass(auth.BasicAuthHandler("foo", "bar"), *args, url=server.baseurl)
       
   183 
   175 
   184             start = time.time()
   176     with test_server(handler=tweetsource, methods=("post", "get"), port="random") as server:
   185             stream.next()
   177         auth = BasicAuthHandler("foo", "bar")
   186             first = time.time()
   178         stream = cls(auth, *args, url=server.baseurl)
   187             diff = first - start
   179         start = time.time()
   188             assert diff < 1, "Getting first tweet took more than a second!"
   180         stream.next()
       
   181         first = time.time()
       
   182         diff = first - start
       
   183         assert diff < 1, "Getting first tweet took more than a second!"
   189 
   184 
   190     do_test(TweetStream)
       
   191     do_test(FollowStream, [1, 2, 3])
       
   192     do_test(TrackStream, ["foo", "bar"])
       
   193 
       
   194