script/rest/search_twitter.py
changeset 1031 5d301c2ddb89
parent 987 18cb05f027a0
--- a/script/rest/search_twitter.py	Fri Dec 13 15:57:02 2013 +0100
+++ b/script/rest/search_twitter.py	Tue Dec 17 17:49:01 2013 +0100
@@ -6,6 +6,7 @@
 import twitter
 
 from iri_tweet import models, processor, utils
+import urlparse
 
 
 def get_options():
@@ -18,7 +19,7 @@
                         help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR")
     parser.add_argument("-Q", dest="query",
                       help="query", metavar="QUERY")
-    parser.add_argument("-P", dest="rpp", metavar="RPP", default="50",
+    parser.add_argument("-P", dest="rpp", metavar="RPP", default="100",
                       help="Result per page")
     parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
                       help="Token file name")
@@ -37,6 +38,17 @@
     auth = twitter.OAuth(token=access_token[0], token_secret=access_token[1], consumer_key=consumer_key, consumer_secret=consumer_secret)
     return auth
 
+def get_max_id(results):
+    next_results = results.get('search_metadata',{}).get('next_results','');
+    if next_results and next_results.startswith("?"):
+        next_results = next_results[1:]
+    
+    max_ids = urlparse.parse_qs(next_results).get('max_id',[])
+    max_id = 0
+    if max_ids:
+        max_id = int(max_ids[0])
+    return max_id
+
 if __name__ == "__main__":
 
     options = get_options()
@@ -65,16 +77,17 @@
 
         #get current_maxid
         results = t.search.tweets(q=options.query, result_type="recent")
-        max_id = results.get('search_metadata',{}).get('max_id',0)
+        max_id = get_max_id(results)
         if max_id==0:
             print("No results, exit")
             sys.exit(0)
         
-        while page <= int(1500/int(options.rpp)) and \
-            ( results is None  or len(results.get('statuses',0)) > 0) and \
-            max_id > 0:
+        while page <= int(1500/int(options.rpp)) and ( results is None  or len(results.get('statuses',0)) > 0) and max_id > 0:
+            
             results = t.search.tweets(q=options.query, count=options.rpp, max_id=max_id, include_entities=True, result_type='recent')
-            max_id = results.get('search_metadata',{}).get('since_id',1) - 1
+            
+            max_id = get_max_id(results)
+
             
             for tweet in results["statuses"]:
                 print tweet