script/lib/iri_tweet/iri_tweet/processor.py
changeset 915 c428119e8399
parent 891 8628c590f608
child 957 e4d0094f097b
equal deleted inserted replaced
914:43876221071f 915:c428119e8399
   230         self.logger.debug("Process_entity entity_dict: " + repr(entity_dict)) #@UndefinedVariable
   230         self.logger.debug("Process_entity entity_dict: " + repr(entity_dict)) #@UndefinedVariable
   231         if entity_klass:
   231         if entity_klass:
   232             self.obj_buffer.add_object(entity_klass, None, entity_dict, False)
   232             self.obj_buffer.add_object(entity_klass, None, entity_dict, False)
   233 
   233 
   234 
   234 
   235     def __process_twitter_stream(self):
   235     def __process_twitter(self):
   236         
   236         
   237         tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count()
   237         tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count()
   238         if tweet_nb > 0:
   238         if tweet_nb > 0:
   239             return
   239             return
   240         
   240         
   271             for ind in extractor.extract_urls_with_indices():
   271             for ind in extractor.extract_urls_with_indices():
   272                 self.__process_entity(ind, "urls")
   272                 self.__process_entity(ind, "urls")
   273             
   273             
   274             for ind in extractor.extract_mentioned_screen_names_with_indices():
   274             for ind in extractor.extract_mentioned_screen_names_with_indices():
   275                 self.__process_entity(ind, "user_mentions")
   275                 self.__process_entity(ind, "user_mentions")
   276 
       
   277     def __process_twitter_rest(self):
       
   278         tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count()
       
   279         if tweet_nb > 0:
       
   280             return
       
   281         
       
   282         
       
   283         tweet_fields = {
       
   284             'created_at': self.json_dict["created_at"], 
       
   285             'favorited': False,
       
   286             'id': self.json_dict["id"],
       
   287             'id_str': self.json_dict["id_str"],
       
   288             #'in_reply_to_screen_name': ts["to_user"], 
       
   289             'in_reply_to_user_id': self.json_dict.get("in_reply_to_user_id",None),
       
   290             'in_reply_to_user_id_str': self.json_dict.get("in_reply_to_user_id_str", None),
       
   291             #'place': ts["place"],
       
   292             'source': self.json_dict["source"],
       
   293             'text': self.json_dict["text"],
       
   294             'truncated': False,            
       
   295             'tweet_source_id' : self.source_id,
       
   296         }
       
   297         
       
   298         #user
       
   299     
       
   300         user_fields = {
       
   301             'lang' : self.json_dict.get('iso_language_code',None),
       
   302             'profile_image_url' : self.json_dict["profile_image_url"],
       
   303             'screen_name' : self.json_dict["from_user"],
       
   304             'id' : self.json_dict["from_user_id"],
       
   305             'id_str' : self.json_dict["from_user_id_str"],
       
   306             'name' : self.json_dict['from_user_name'],
       
   307         }
       
   308         
       
   309         user = self.__get_user(user_fields, do_merge=False)
       
   310         if user is None:
       
   311             self.logger.warning("USER not found " + repr(user_fields)) #@UndefinedVariable
       
   312             tweet_fields["user_id"] = None
       
   313         else:
       
   314             tweet_fields["user_id"] = user.id
       
   315         
       
   316         tweet_fields = adapt_fields(tweet_fields, fields_adapter["rest"]["tweet"])
       
   317         self.tweet = self.obj_buffer.add_object(Tweet, None, tweet_fields, True)
       
   318                 
       
   319         self.__process_entities()
       
   320 
   276 
   321 
   277 
   322 
   278 
   323     def process_source(self):
   279     def process_source(self):
   324                 
   280                 
   326         log = self.session.query(TweetLog).filter(TweetLog.status_id==status_id).first()
   282         log = self.session.query(TweetLog).filter(TweetLog.status_id==status_id).first()
   327         if(log):
   283         if(log):
   328             self.obj_buffer.add_object(TweetLog, log, {'status': TweetLog.TWEET_STATUS['DELETE'], 'status_id': None})
   284             self.obj_buffer.add_object(TweetLog, log, {'status': TweetLog.TWEET_STATUS['DELETE'], 'status_id': None})
   329             self.session.query(TweetSource).filter(TweetSource.id==self.source_id).delete()
   285             self.session.query(TweetSource).filter(TweetSource.id==self.source_id).delete()
   330         else:
   286         else:
   331             if "metadata" in self.json_dict:
   287             self.__process_twitter()
   332                 self.__process_twitter_rest()
       
   333             else:
       
   334                 self.__process_twitter_stream()
       
   335 
   288 
   336         self.obj_buffer.add_object(TweetLog, None, {'tweet_source_id':self.source_id, 'status':TweetLog.TWEET_STATUS['OK']}, True)
   289         self.obj_buffer.add_object(TweetLog, None, {'tweet_source_id':self.source_id, 'status':TweetLog.TWEET_STATUS['OK']}, True)
   337 
   290 
   338     def log_info(self):
   291     def log_info(self):
   339         screen_name = self.json_dict.get("user",{}).get("screen_name","")
   292         screen_name = self.json_dict.get("user",{}).get("screen_name","")