script/lib/tweetstream/tests/test_tweetstream.py
changeset 13 79b6e132e3d7
parent 12 4daf47fcf792
child 14 10e7a0c7c64f
child 207 621fa6caec0c
equal deleted inserted replaced
12:4daf47fcf792 13:79b6e132e3d7
     1 import contextlib
     1 import contextlib
     2 import threading
     2 import threading
     3 import time
     3 import time
     4 from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
     4 from BaseHTTPServer import BaseHTTPRequestHandler, HTTPServer
     5 
     5 
     6 from nose.tools import assert_raises
     6 from tweetstream import TweetStream, FollowStream, TrackStream, LocationStream
     7 from tweetstream import TweetStream, FollowStream, TrackStream
     7 from tweetstream import ConnectionError, AuthenticationError, SampleStream
     8 from tweetstream import ConnectionError, AuthenticationError
     8 
       
     9 import pytest
       
    10 from pytest import raises
       
    11 slow = pytest.mark.slow
     9 
    12 
    10 from servercontext import test_server
    13 from servercontext import test_server
    11 
    14 
    12 single_tweet = r"""{"in_reply_to_status_id":null,"in_reply_to_user_id":null,"favorited":false,"created_at":"Tue Jun 16 10:40:14 +0000 2009","in_reply_to_screen_name":null,"text":"record industry just keeps on amazing me: http:\/\/is.gd\/13lFo - $150k per song you've SHARED, not that somebody has actually DOWNLOADED.","user":{"notifications":null,"profile_background_tile":false,"followers_count":206,"time_zone":"Copenhagen","utc_offset":3600,"friends_count":191,"profile_background_color":"ffffff","profile_image_url":"http:\/\/s3.amazonaws.com\/twitter_production\/profile_images\/250715794\/profile_normal.png","description":"Digital product developer, currently at Opera Software. My tweets are my opinions, not those of my employer.","verified_profile":false,"protected":false,"favourites_count":0,"profile_text_color":"3C3940","screen_name":"eiriksnilsen","name":"Eirik Stridsklev N.","following":null,"created_at":"Tue May 06 12:24:12 +0000 2008","profile_background_image_url":"http:\/\/s3.amazonaws.com\/twitter_production\/profile_background_images\/10531192\/160x600opera15.gif","profile_link_color":"0099B9","profile_sidebar_fill_color":"95E8EC","url":"http:\/\/www.stridsklev-nilsen.no\/eirik","id":14672543,"statuses_count":506,"profile_sidebar_border_color":"5ED4DC","location":"Oslo, Norway"},"id":2190767504,"truncated":false,"source":"<a href=\"http:\/\/widgets.opera.com\/widget\/7206\">Twitter Opera widget<\/a>"}"""
    15 single_tweet = r"""{"in_reply_to_status_id":null,"in_reply_to_user_id":null,"favorited":false,"created_at":"Tue Jun 16 10:40:14 +0000 2009","in_reply_to_screen_name":null,"text":"record industry just keeps on amazing me: http:\/\/is.gd\/13lFo - $150k per song you've SHARED, not that somebody has actually DOWNLOADED.","user":{"notifications":null,"profile_background_tile":false,"followers_count":206,"time_zone":"Copenhagen","utc_offset":3600,"friends_count":191,"profile_background_color":"ffffff","profile_image_url":"http:\/\/s3.amazonaws.com\/twitter_production\/profile_images\/250715794\/profile_normal.png","description":"Digital product developer, currently at Opera Software. My tweets are my opinions, not those of my employer.","verified_profile":false,"protected":false,"favourites_count":0,"profile_text_color":"3C3940","screen_name":"eiriksnilsen","name":"Eirik Stridsklev N.","following":null,"created_at":"Tue May 06 12:24:12 +0000 2008","profile_background_image_url":"http:\/\/s3.amazonaws.com\/twitter_production\/profile_background_images\/10531192\/160x600opera15.gif","profile_link_color":"0099B9","profile_sidebar_fill_color":"95E8EC","url":"http:\/\/www.stridsklev-nilsen.no\/eirik","id":14672543,"statuses_count":506,"profile_sidebar_border_color":"5ED4DC","location":"Oslo, Norway"},"id":2190767504,"truncated":false,"source":"<a href=\"http:\/\/widgets.opera.com\/widget\/7206\">Twitter Opera widget<\/a>"}"""
    13 
    16 
    14 
    17 
    15 def test_bad_auth():
    18 def parameterized(funcarglist):
       
    19     def wrapper(function):
       
    20         function.funcarglist = funcarglist
       
    21         return function
       
    22     return wrapper
       
    23 
       
    24 def pytest_generate_tests(metafunc):
       
    25     for funcargs in getattr(metafunc.function, 'funcarglist', ()):
       
    26         metafunc.addcall(funcargs=funcargs)
       
    27 
       
    28 
       
    29 streamtypes = [
       
    30     dict(cls=TweetStream, args=[], kwargs=dict()),
       
    31     dict(cls=SampleStream, args=[], kwargs=dict()),
       
    32     dict(cls=FollowStream, args=[[1, 2, 3]], kwargs=dict()),
       
    33     dict(cls=TrackStream, args=["opera"], kwargs=dict()),
       
    34     dict(cls=LocationStream, args=["123,4321"], kwargs=dict())
       
    35 ]
       
    36 
       
    37 
       
    38 @parameterized(streamtypes)
       
    39 def test_bad_auth(cls, args, kwargs):
    16     """Test that the proper exception is raised when the user could not be
    40     """Test that the proper exception is raised when the user could not be
    17     authenticated"""
    41     authenticated"""
    18     def auth_denied(request):
    42     def auth_denied(request):
    19         request.send_error(401)
    43         request.send_error(401)
    20 
    44 
    21     with test_server(handler=auth_denied, methods=("post", "get"),
    45     with test_server(handler=auth_denied, methods=("post", "get"), port="random") as server:
    22                      port="random") as server:
    46         stream = cls("user", "passwd", *args, url=server.baseurl)
    23         stream = TweetStream("foo", "bar", url=server.baseurl)
       
    24         assert_raises(AuthenticationError, stream.next)
       
    25 
       
    26         stream = FollowStream("foo", "bar", [1, 2, 3], url=server.baseurl)
       
    27         assert_raises(AuthenticationError, stream.next)
       
    28 
       
    29         stream = TrackStream("foo", "bar", ["opera"], url=server.baseurl)
       
    30         assert_raises(AuthenticationError, stream.next)
       
    31 
    47 
    32 
    48 
    33 def test_404_url():
    49 @parameterized(streamtypes)
       
    50 def test_404_url(cls, args, kwargs):
    34     """Test that the proper exception is raised when the stream URL can't be
    51     """Test that the proper exception is raised when the stream URL can't be
    35     found"""
    52     found"""
    36     def not_found(request):
    53     def not_found(request):
    37         request.send_error(404)
    54         request.send_error(404)
    38 
    55 
    39     with test_server(handler=not_found, methods=("post", "get"),
    56     with test_server(handler=not_found, methods=("post", "get"), port="random") as server:
    40                      port="random") as server:
    57         stream = cls("user", "passwd", *args, url=server.baseurl)
    41         stream = TweetStream("foo", "bar", url=server.baseurl)
       
    42         assert_raises(ConnectionError, stream.next)
       
    43 
       
    44         stream = FollowStream("foo", "bar", [1, 2, 3], url=server.baseurl)
       
    45         assert_raises(ConnectionError, stream.next)
       
    46 
       
    47         stream = TrackStream("foo", "bar", ["opera"], url=server.baseurl)
       
    48         assert_raises(ConnectionError, stream.next)
       
    49 
    58 
    50 
    59 
    51 def test_bad_content():
    60 @parameterized(streamtypes)
       
    61 def test_bad_content(cls, args, kwargs):
    52     """Test error handling if we are given invalid data"""
    62     """Test error handling if we are given invalid data"""
    53     def bad_content(request):
    63     def bad_content(request):
    54         for n in xrange(10):
    64         for n in xrange(10):
    55             # what json we pass doesn't matter. It's not verifying the
    65             # what json we pass doesn't matter. It's not verifying the
    56             # strcuture, only checking that it's parsable
    66             # strcuture, only checking that it's parsable
    57             yield "[1,2,3]"
    67             yield "[1,2,3]"
    58         yield "[1,2, I need no stinking close brace"
    68         yield "[1,2, I need no stinking close brace"
    59         yield "[1,2,3]"
    69         yield "[1,2,3]"
    60 
    70 
    61     def do_test(klass, *args):
    71 
    62         with test_server(handler=bad_content, methods=("post", "get"),
    72     with raises(ConnectionError):
    63                          port="random") as server:
    73         with test_server(handler=bad_content, methods=("post", "get"), port="random") as server:
    64             stream = klass("foo", "bar", *args, url=server.baseurl)
    74             stream = cls("user", "passwd", *args, url=server.baseurl)
    65             for tweet in stream:
    75             for tweet in stream:
    66                 pass
    76                 pass
    67 
    77 
    68     assert_raises(ConnectionError, do_test, TweetStream)
       
    69     assert_raises(ConnectionError, do_test, FollowStream, [1, 2, 3])
       
    70     assert_raises(ConnectionError, do_test, TrackStream, ["opera"])
       
    71 
    78 
    72 
    79 @parameterized(streamtypes)
    73 def test_closed_connection():
    80 def test_closed_connection(cls, args, kwargs):
    74     """Test error handling if server unexpectedly closes connection"""
    81     """Test error handling if server unexpectedly closes connection"""
    75     cnt = 1000
    82     cnt = 1000
    76     def bad_content(request):
    83     def bad_content(request):
    77         for n in xrange(cnt):
    84         for n in xrange(cnt):
    78             # what json we pass doesn't matter. It's not verifying the
    85             # what json we pass doesn't matter. It's not verifying the
    79             # strcuture, only checking that it's parsable
    86             # strcuture, only checking that it's parsable
    80             yield "[1,2,3]"
    87             yield "[1,2,3]"
    81 
    88 
    82     def do_test(klass, *args):
    89     with raises(ConnectionError):
    83         with test_server(handler=bad_content, methods=("post", "get"),
    90         with test_server(handler=bad_content, methods=("post", "get"), port="random") as server:
    84                          port="random") as server:
    91             stream = cls("foo", "bar", *args, url=server.baseurl)
    85             stream = klass("foo", "bar", *args, url=server.baseurl)
       
    86             for tweet in stream:
    92             for tweet in stream:
    87                 pass
    93                 pass
    88 
    94 
    89     assert_raises(ConnectionError, do_test, TweetStream)
    95 
    90     assert_raises(ConnectionError, do_test, FollowStream, [1, 2, 3])
    96 @parameterized(streamtypes)
    91     assert_raises(ConnectionError, do_test, TrackStream, ["opera"])
    97 def test_bad_host(cls, args, kwargs):
       
    98     """Test behaviour if we can't connect to the host"""
       
    99     with raises(ConnectionError):
       
   100         stream = cls("username", "passwd", *args, url="http://wedfwecfghhreewerewads.foo")
       
   101         stream.next()
    92 
   102 
    93 
   103 
    94 def test_bad_host():
   104 @parameterized(streamtypes)
    95     """Test behaviour if we can't connect to the host"""
   105 def smoke_test_receive_tweets(cls, args, kwargs):
    96     stream = TweetStream("foo", "bar", url="http://bad.egewdvsdswefdsf.com/")
       
    97     assert_raises(ConnectionError, stream.next)
       
    98 
       
    99     stream = FollowStream("foo", "bar", [1, 2, 3], url="http://zegwefdsf.com/")
       
   100     assert_raises(ConnectionError, stream.next)
       
   101 
       
   102     stream = TrackStream("foo", "bar", ["foo"], url="http://aswefdsews.com/")
       
   103     assert_raises(ConnectionError, stream.next)
       
   104 
       
   105 
       
   106 def smoke_test_receive_tweets():
       
   107     """Receive 100k tweets and disconnect (slow)"""
   106     """Receive 100k tweets and disconnect (slow)"""
   108     total = 100000
   107     total = 100000
   109 
   108 
   110     def tweetsource(request):
   109     def tweetsource(request):
   111         while True:
   110         while True:
   112             yield single_tweet + "\n"
   111             yield single_tweet + "\n"
   113 
   112 
   114     def do_test(klass, *args):
   113     with test_server(handler=tweetsource, methods=("post", "get"), port="random") as server:
   115         with test_server(handler=tweetsource,
   114         stream = cls("foo", "bar", *args, url=server.baseurl)
   116                          methods=("post", "get"), port="random") as server:
   115         for tweet in stream:
   117             stream = klass("foo", "bar", *args, url=server.baseurl)
   116             if stream.count == total:
   118             for tweet in stream:
   117                 break
   119                 if stream.count == total:
       
   120                     break
       
   121 
       
   122     do_test(TweetStream)
       
   123     do_test(FollowStream, [1, 2, 3])
       
   124     do_test(TrackStream, ["foo", "bar"])
       
   125 
   118 
   126 
   119 
   127 def test_keepalive():
   120 @parameterized(streamtypes)
       
   121 def test_keepalive(cls, args, kwargs):
   128     """Make sure we behave sanely when there are keepalive newlines in the
   122     """Make sure we behave sanely when there are keepalive newlines in the
   129     data recevived from twitter"""
   123     data recevived from twitter"""
   130     def tweetsource(request):
   124     def tweetsource(request):
   131         yield single_tweet+"\n"
   125         yield single_tweet+"\n"
   132         yield "\n"
   126         yield "\n"
   140         yield "\n"
   134         yield "\n"
   141         yield "\n"
   135         yield "\n"
   142         yield single_tweet+"\n"
   136         yield single_tweet+"\n"
   143         yield "\n"
   137         yield "\n"
   144 
   138 
   145     def do_test(klass, *args):
   139 
   146         with test_server(handler=tweetsource, methods=("post", "get"),
   140     with test_server(handler=tweetsource, methods=("post", "get"), port="random") as server:
   147                          port="random") as server:
   141         stream = cls("foo", "bar", *args, url=server.baseurl)
   148             stream = klass("foo", "bar", *args, url=server.baseurl)
   142         try:
   149             try:
   143             for tweet in stream:
   150                 for tweet in stream:
   144                 pass
   151                     pass
   145         except ConnectionError:
   152             except ConnectionError:
   146             assert stream.count == 3, "Got %s, wanted 3" % stream.count
   153                 assert stream.count == 3, "Got %s, wanted 3" % stream.count
   147         else:
   154             else:
   148             assert False, "Didn't handle keepalive"
   155                 assert False, "Didn't handle keepalive"
       
   156 
   149 
   157 
   150 
   158     do_test(TweetStream)
   151 @slow
   159     do_test(FollowStream, [1, 2, 3])
   152 @parameterized(streamtypes)
   160     do_test(TrackStream, ["foo", "bar"])
   153 def test_buffering(cls, args, kwargs):
   161 
       
   162 
       
   163 def test_buffering():
       
   164     """Test if buffering stops data from being returned immediately.
   154     """Test if buffering stops data from being returned immediately.
   165     If there is some buffering in play that might mean data is only returned
   155     If there is some buffering in play that might mean data is only returned
   166     from the generator when the buffer is full. If buffer is bigger than a
   156     from the generator when the buffer is full. If buffer is bigger than a
   167     tweet, this will happen. Default buffer size in the part of socket lib
   157     tweet, this will happen. Default buffer size in the part of socket lib
   168     that enables readline is 8k. Max tweet length is around 3k."""
   158     that enables readline is 8k. Max tweet length is around 3k."""
   173         # need to yield a bunch here so we're sure we'll return from the
   163         # need to yield a bunch here so we're sure we'll return from the
   174         # blocking call in case the buffering bug is present.
   164         # blocking call in case the buffering bug is present.
   175         for n in xrange(100):
   165         for n in xrange(100):
   176             yield single_tweet+"\n"
   166             yield single_tweet+"\n"
   177 
   167 
   178     def do_test(klass, *args):
       
   179         with test_server(handler=tweetsource, methods=("post", "get"),
       
   180                          port="random") as server:
       
   181             stream = klass("foo", "bar", *args, url=server.baseurl)
       
   182 
   168 
   183             start = time.time()
   169     with test_server(handler=tweetsource, methods=("post", "get"), port="random") as server:
   184             stream.next()
   170         stream = cls("foo", "bar", *args, url=server.baseurl)
   185             first = time.time()
   171         start = time.time()
   186             diff = first - start
   172         stream.next()
   187             assert diff < 1, "Getting first tweet took more than a second!"
   173         first = time.time()
       
   174         diff = first - start
       
   175         assert diff < 1, "Getting first tweet took more than a second!"
   188 
   176 
   189     do_test(TweetStream)
       
   190     do_test(FollowStream, [1, 2, 3])
       
   191     do_test(TrackStream, ["foo", "bar"])
       
   192 
       
   193