diff -r e2e4d4097b95 -r 5d301c2ddb89 script/rest/search_twitter.py --- a/script/rest/search_twitter.py Fri Dec 13 15:57:02 2013 +0100 +++ b/script/rest/search_twitter.py Tue Dec 17 17:49:01 2013 +0100 @@ -6,6 +6,7 @@ import twitter from iri_tweet import models, processor, utils +import urlparse def get_options(): @@ -18,7 +19,7 @@ help="write tweet to DATABASE. This is a connection string", metavar="CONNECTION_STR") parser.add_argument("-Q", dest="query", help="query", metavar="QUERY") - parser.add_argument("-P", dest="rpp", metavar="RPP", default="50", + parser.add_argument("-P", dest="rpp", metavar="RPP", default="100", help="Result per page") parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token", help="Token file name") @@ -37,6 +38,17 @@ auth = twitter.OAuth(token=access_token[0], token_secret=access_token[1], consumer_key=consumer_key, consumer_secret=consumer_secret) return auth +def get_max_id(results): + next_results = results.get('search_metadata',{}).get('next_results',''); + if next_results and next_results.startswith("?"): + next_results = next_results[1:] + + max_ids = urlparse.parse_qs(next_results).get('max_id',[]) + max_id = 0 + if max_ids: + max_id = int(max_ids[0]) + return max_id + if __name__ == "__main__": options = get_options() @@ -65,16 +77,17 @@ #get current_maxid results = t.search.tweets(q=options.query, result_type="recent") - max_id = results.get('search_metadata',{}).get('max_id',0) + max_id = get_max_id(results) if max_id==0: print("No results, exit") sys.exit(0) - while page <= int(1500/int(options.rpp)) and \ - ( results is None or len(results.get('statuses',0)) > 0) and \ - max_id > 0: + while page <= int(1500/int(options.rpp)) and ( results is None or len(results.get('statuses',0)) > 0) and max_id > 0: + results = t.search.tweets(q=options.query, count=options.rpp, max_id=max_id, include_entities=True, result_type='recent') - max_id = results.get('search_metadata',{}).get('since_id',1) - 1 + + max_id = get_max_id(results) + for tweet in results["statuses"]: print tweet