--- a/script/rest/search_twitter.py Fri Jun 07 11:48:10 2013 +0200
+++ b/script/rest/search_twitter.py Fri Jun 07 11:50:02 2013 +0200
@@ -1,10 +1,10 @@
-from iri_tweet import models, processor
+from iri_tweet import models, processor, utils
from optparse import OptionParser
import anyjson
import re
+import sys
import twitter
-
def get_option():
parser = OptionParser()
@@ -21,16 +21,27 @@
help="Result per page")
parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
help="Token file name")
+ parser.add_option("-k", "--key", dest="consumer_key",
+ help="Twitter consumer key", metavar="CONSUMER_KEY")
+ parser.add_option("-s", "--secret", dest="consumer_secret",
+ help="Twitter consumer secret", metavar="CONSUMER_SECRET")
+ return parser.parse_args()
- return parser.parse_args()
+def get_auth(options, access_token):
+ consumer_key = options.consumer_key
+ consumer_secret = options.consumer_secret
+ auth = twitter.OAuth(token=access_token[0], token_secret=access_token[1], consumer_key=consumer_key, consumer_secret=consumer_secret)
+ return auth
if __name__ == "__main__":
(options, args) = get_option()
+
+ access_token = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename)
+ auth = get_auth(options, access_token)
- t = twitter.Twitter(domain="search.twitter.com")
- t.secure = False
+ t = twitter.Twitter(domain="api.twitter.com",api_version="1.1",secure=True, auth=auth)
conn_str = args[0].strip()
if not re.match("^\w+://.+", conn_str):
@@ -49,16 +60,26 @@
results = None
page = 1
print options.query
+
+ #get current_maxid
+ results = t.search.tweets(q=options.query, result_type="recent")
+ max_id = results.get('search_metadata',{}).get('max_id',0)
+ if max_id==0:
+ print("No results, exit")
+ sys.exit(0)
- while page <= int(1500/int(options.rpp)) and ( results is None or len(results) > 0):
- results = t.search(q=options.query, rpp=options.rpp, page=page, include_entities=True)
+ while page <= int(1500/int(options.rpp)) and \
+ ( results is None or len(results.get('statuses',0)) > 0) and \
+ max_id > 0:
+ results = t.search.tweets(q=options.query, count=options.rpp, max_id=max_id, include_entities=True, result_type='recent')
+ max_id = results.get('search_metadata',{}).get('since_id',1) - 1
- for tweet in results["results"]:
+ for tweet in results["statuses"]:
print tweet
tweet_str = anyjson.serialize(tweet)
#invalidate user id
- processor = processor.TwitterProcessorStatus(json_dict=tweet, json_txt=tweet_str, source_id=None, session=session, consumer_token=None, access_token=None, token_filename=options.token_filename, user_query_twitter=False, logger=None)
- processor.process()
+ p = processor.TwitterProcessorStatus(json_dict=tweet, json_txt=tweet_str, source_id=None, session=session, consumer_token=(options.consumer_key, options.consumer_secret), access_token=access_token, token_filename=options.token_filename, user_query_twitter=False, logger=None)
+ p.process()
session.flush()
session.commit()
page += 1