# HG changeset patch # User Raphael Velt # Date 1370598602 -7200 # Node ID 0646fa4949fad2ee1e214895c9d3320a1e08a5b5 # Parent 68e577cd07faf430b1cc10b330c7368434319647# Parent c47f290a001f486400f0abe5c3f1ad96d55d17d7 Merge with 9274ffa6c20ab647dce42d51c49440e149b306a5 diff -r 68e577cd07fa -r 0646fa4949fa script/lib/iri_tweet/iri_tweet/processor.py --- a/script/lib/iri_tweet/iri_tweet/processor.py Fri Jun 07 11:48:10 2013 +0200 +++ b/script/lib/iri_tweet/iri_tweet/processor.py Fri Jun 07 11:50:02 2013 +0200 @@ -232,7 +232,7 @@ self.obj_buffer.add_object(entity_klass, None, entity_dict, False) - def __process_twitter_stream(self): + def __process_twitter(self): tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count() if tweet_nb > 0: @@ -274,50 +274,6 @@ for ind in extractor.extract_mentioned_screen_names_with_indices(): self.__process_entity(ind, "user_mentions") - def __process_twitter_rest(self): - tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count() - if tweet_nb > 0: - return - - - tweet_fields = { - 'created_at': self.json_dict["created_at"], - 'favorited': False, - 'id': self.json_dict["id"], - 'id_str': self.json_dict["id_str"], - #'in_reply_to_screen_name': ts["to_user"], - 'in_reply_to_user_id': self.json_dict.get("in_reply_to_user_id",None), - 'in_reply_to_user_id_str': self.json_dict.get("in_reply_to_user_id_str", None), - #'place': ts["place"], - 'source': self.json_dict["source"], - 'text': self.json_dict["text"], - 'truncated': False, - 'tweet_source_id' : self.source_id, - } - - #user - - user_fields = { - 'lang' : self.json_dict.get('iso_language_code',None), - 'profile_image_url' : self.json_dict["profile_image_url"], - 'screen_name' : self.json_dict["from_user"], - 'id' : self.json_dict["from_user_id"], - 'id_str' : self.json_dict["from_user_id_str"], - 'name' : self.json_dict['from_user_name'], - } - - user = self.__get_user(user_fields, do_merge=False) - if user is None: - self.logger.warning("USER not found " + repr(user_fields)) #@UndefinedVariable - tweet_fields["user_id"] = None - else: - tweet_fields["user_id"] = user.id - - tweet_fields = adapt_fields(tweet_fields, fields_adapter["rest"]["tweet"]) - self.tweet = self.obj_buffer.add_object(Tweet, None, tweet_fields, True) - - self.__process_entities() - def process_source(self): @@ -328,10 +284,7 @@ self.obj_buffer.add_object(TweetLog, log, {'status': TweetLog.TWEET_STATUS['DELETE'], 'status_id': None}) self.session.query(TweetSource).filter(TweetSource.id==self.source_id).delete() else: - if "metadata" in self.json_dict: - self.__process_twitter_rest() - else: - self.__process_twitter_stream() + self.__process_twitter() self.obj_buffer.add_object(TweetLog, None, {'tweet_source_id':self.source_id, 'status':TweetLog.TWEET_STATUS['OK']}, True) diff -r 68e577cd07fa -r 0646fa4949fa script/rest/search_twitter.py --- a/script/rest/search_twitter.py Fri Jun 07 11:48:10 2013 +0200 +++ b/script/rest/search_twitter.py Fri Jun 07 11:50:02 2013 +0200 @@ -1,10 +1,10 @@ -from iri_tweet import models, processor +from iri_tweet import models, processor, utils from optparse import OptionParser import anyjson import re +import sys import twitter - def get_option(): parser = OptionParser() @@ -21,16 +21,27 @@ help="Result per page") parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token", help="Token file name") + parser.add_option("-k", "--key", dest="consumer_key", + help="Twitter consumer key", metavar="CONSUMER_KEY") + parser.add_option("-s", "--secret", dest="consumer_secret", + help="Twitter consumer secret", metavar="CONSUMER_SECRET") + return parser.parse_args() - return parser.parse_args() +def get_auth(options, access_token): + consumer_key = options.consumer_key + consumer_secret = options.consumer_secret + auth = twitter.OAuth(token=access_token[0], token_secret=access_token[1], consumer_key=consumer_key, consumer_secret=consumer_secret) + return auth if __name__ == "__main__": (options, args) = get_option() + + access_token = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename) + auth = get_auth(options, access_token) - t = twitter.Twitter(domain="search.twitter.com") - t.secure = False + t = twitter.Twitter(domain="api.twitter.com",api_version="1.1",secure=True, auth=auth) conn_str = args[0].strip() if not re.match("^\w+://.+", conn_str): @@ -49,16 +60,26 @@ results = None page = 1 print options.query + + #get current_maxid + results = t.search.tweets(q=options.query, result_type="recent") + max_id = results.get('search_metadata',{}).get('max_id',0) + if max_id==0: + print("No results, exit") + sys.exit(0) - while page <= int(1500/int(options.rpp)) and ( results is None or len(results) > 0): - results = t.search(q=options.query, rpp=options.rpp, page=page, include_entities=True) + while page <= int(1500/int(options.rpp)) and \ + ( results is None or len(results.get('statuses',0)) > 0) and \ + max_id > 0: + results = t.search.tweets(q=options.query, count=options.rpp, max_id=max_id, include_entities=True, result_type='recent') + max_id = results.get('search_metadata',{}).get('since_id',1) - 1 - for tweet in results["results"]: + for tweet in results["statuses"]: print tweet tweet_str = anyjson.serialize(tweet) #invalidate user id - processor = processor.TwitterProcessorStatus(json_dict=tweet, json_txt=tweet_str, source_id=None, session=session, consumer_token=None, access_token=None, token_filename=options.token_filename, user_query_twitter=False, logger=None) - processor.process() + p = processor.TwitterProcessorStatus(json_dict=tweet, json_txt=tweet_str, source_id=None, session=session, consumer_token=(options.consumer_key, options.consumer_secret), access_token=access_token, token_filename=options.token_filename, user_query_twitter=False, logger=None) + p.process() session.flush() session.commit() page += 1 diff -r 68e577cd07fa -r 0646fa4949fa web/common.php --- a/web/common.php Fri Jun 07 11:48:10 2013 +0200 +++ b/web/common.php Fri Jun 07 11:50:02 2013 +0200 @@ -17,7 +17,7 @@ $ldt_platform = 'http://ldt.iri.centrepompidou.fr/'; $project_url_base = 'ldtplatform/ldt/cljson/id/'; -$C_default_rep = 'bpi-floptechno'; +$C_default_rep = 'fens2013'; $C_feedback_form_url = 'https://spreadsheets.google.com/spreadsheet/viewform?hl=en_US&formkey=dDZILVdXVHRzd0xhWGVZXzkweHN2RGc6MQ#gid=0'; $archives_list = array(