Merge with 9274ffa6c20ab647dce42d51c49440e149b306a5
authorRaphael Velt <raph.velt@gmail.com>
Fri, 07 Jun 2013 11:50:02 +0200
changeset 918 0646fa4949fa
parent 913 68e577cd07fa (current diff)
parent 917 c47f290a001f (diff)
child 919 e126d3e1e186
Merge with 9274ffa6c20ab647dce42d51c49440e149b306a5
web/common.php
--- a/script/lib/iri_tweet/iri_tweet/processor.py	Fri Jun 07 11:48:10 2013 +0200
+++ b/script/lib/iri_tweet/iri_tweet/processor.py	Fri Jun 07 11:50:02 2013 +0200
@@ -232,7 +232,7 @@
             self.obj_buffer.add_object(entity_klass, None, entity_dict, False)
 
 
-    def __process_twitter_stream(self):
+    def __process_twitter(self):
         
         tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count()
         if tweet_nb > 0:
@@ -274,50 +274,6 @@
             for ind in extractor.extract_mentioned_screen_names_with_indices():
                 self.__process_entity(ind, "user_mentions")
 
-    def __process_twitter_rest(self):
-        tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count()
-        if tweet_nb > 0:
-            return
-        
-        
-        tweet_fields = {
-            'created_at': self.json_dict["created_at"], 
-            'favorited': False,
-            'id': self.json_dict["id"],
-            'id_str': self.json_dict["id_str"],
-            #'in_reply_to_screen_name': ts["to_user"], 
-            'in_reply_to_user_id': self.json_dict.get("in_reply_to_user_id",None),
-            'in_reply_to_user_id_str': self.json_dict.get("in_reply_to_user_id_str", None),
-            #'place': ts["place"],
-            'source': self.json_dict["source"],
-            'text': self.json_dict["text"],
-            'truncated': False,            
-            'tweet_source_id' : self.source_id,
-        }
-        
-        #user
-    
-        user_fields = {
-            'lang' : self.json_dict.get('iso_language_code',None),
-            'profile_image_url' : self.json_dict["profile_image_url"],
-            'screen_name' : self.json_dict["from_user"],
-            'id' : self.json_dict["from_user_id"],
-            'id_str' : self.json_dict["from_user_id_str"],
-            'name' : self.json_dict['from_user_name'],
-        }
-        
-        user = self.__get_user(user_fields, do_merge=False)
-        if user is None:
-            self.logger.warning("USER not found " + repr(user_fields)) #@UndefinedVariable
-            tweet_fields["user_id"] = None
-        else:
-            tweet_fields["user_id"] = user.id
-        
-        tweet_fields = adapt_fields(tweet_fields, fields_adapter["rest"]["tweet"])
-        self.tweet = self.obj_buffer.add_object(Tweet, None, tweet_fields, True)
-                
-        self.__process_entities()
-
 
 
     def process_source(self):
@@ -328,10 +284,7 @@
             self.obj_buffer.add_object(TweetLog, log, {'status': TweetLog.TWEET_STATUS['DELETE'], 'status_id': None})
             self.session.query(TweetSource).filter(TweetSource.id==self.source_id).delete()
         else:
-            if "metadata" in self.json_dict:
-                self.__process_twitter_rest()
-            else:
-                self.__process_twitter_stream()
+            self.__process_twitter()
 
         self.obj_buffer.add_object(TweetLog, None, {'tweet_source_id':self.source_id, 'status':TweetLog.TWEET_STATUS['OK']}, True)
 
--- a/script/rest/search_twitter.py	Fri Jun 07 11:48:10 2013 +0200
+++ b/script/rest/search_twitter.py	Fri Jun 07 11:50:02 2013 +0200
@@ -1,10 +1,10 @@
-from iri_tweet import models, processor
+from iri_tweet import models, processor, utils
 from optparse import OptionParser
 import anyjson
 import re
+import sys
 import twitter
 
-
 def get_option():
     
     parser = OptionParser()
@@ -21,16 +21,27 @@
                       help="Result per page")
     parser.add_option("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
                       help="Token file name")
+    parser.add_option("-k", "--key", dest="consumer_key",
+                        help="Twitter consumer key", metavar="CONSUMER_KEY")
+    parser.add_option("-s", "--secret", dest="consumer_secret",
+                        help="Twitter consumer secret", metavar="CONSUMER_SECRET")
     
+    return parser.parse_args()
 
-    return parser.parse_args()
+def get_auth(options, access_token):
+    consumer_key = options.consumer_key
+    consumer_secret = options.consumer_secret
+    auth = twitter.OAuth(token=access_token[0], token_secret=access_token[1], consumer_key=consumer_key, consumer_secret=consumer_secret)
+    return auth
 
 if __name__ == "__main__":
 
     (options, args) = get_option()
+    
+    access_token = utils.get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename)
+    auth = get_auth(options, access_token)
 
-    t = twitter.Twitter(domain="search.twitter.com")
-    t.secure = False
+    t = twitter.Twitter(domain="api.twitter.com",api_version="1.1",secure=True, auth=auth)
     
     conn_str = args[0].strip()
     if not re.match("^\w+://.+", conn_str):
@@ -49,16 +60,26 @@
         results = None        
         page = 1
         print options.query
+
+        #get current_maxid
+        results = t.search.tweets(q=options.query, result_type="recent")
+        max_id = results.get('search_metadata',{}).get('max_id',0)
+        if max_id==0:
+            print("No results, exit")
+            sys.exit(0)
         
-        while page <= int(1500/int(options.rpp)) and  ( results is None  or len(results) > 0):
-            results = t.search(q=options.query, rpp=options.rpp, page=page, include_entities=True)
+        while page <= int(1500/int(options.rpp)) and \
+            ( results is None  or len(results.get('statuses',0)) > 0) and \
+            max_id > 0:
+            results = t.search.tweets(q=options.query, count=options.rpp, max_id=max_id, include_entities=True, result_type='recent')
+            max_id = results.get('search_metadata',{}).get('since_id',1) - 1
             
-            for tweet in results["results"]:
+            for tweet in results["statuses"]:
                 print tweet
                 tweet_str = anyjson.serialize(tweet)
                 #invalidate user id
-                processor = processor.TwitterProcessorStatus(json_dict=tweet, json_txt=tweet_str, source_id=None, session=session, consumer_token=None, access_token=None, token_filename=options.token_filename, user_query_twitter=False, logger=None)
-                processor.process()
+                p = processor.TwitterProcessorStatus(json_dict=tweet, json_txt=tweet_str, source_id=None, session=session, consumer_token=(options.consumer_key, options.consumer_secret), access_token=access_token, token_filename=options.token_filename, user_query_twitter=False, logger=None)
+                p.process()
                 session.flush()
                 session.commit()
             page += 1
--- a/web/common.php	Fri Jun 07 11:48:10 2013 +0200
+++ b/web/common.php	Fri Jun 07 11:50:02 2013 +0200
@@ -17,7 +17,7 @@
 $ldt_platform = 'http://ldt.iri.centrepompidou.fr/';
 $project_url_base = 'ldtplatform/ldt/cljson/id/';
 
-$C_default_rep = 'bpi-floptechno';
+$C_default_rep = 'fens2013';
 $C_feedback_form_url = 'https://spreadsheets.google.com/spreadsheet/viewform?hl=en_US&formkey=dDZILVdXVHRzd0xhWGVZXzkweHN2RGc6MQ#gid=0';
 
 $archives_list = array(