script/lib/iri_tweet/utils.py
changeset 247 e6b328970ee8
parent 122 4c3a15877f80
child 248 ffb0a6d08000
equal deleted inserted replaced
199:514e0ee0c68a 247:e6b328970ee8
   227             ts_copy["user_id"] = ts_copy["user"].id
   227             ts_copy["user_id"] = ts_copy["user"].id
   228         ts_copy["original_json"] = self.json_txt
   228         ts_copy["original_json"] = self.json_txt
   229         
   229         
   230         self.tweet = Tweet(**ts_copy)
   230         self.tweet = Tweet(**ts_copy)
   231         self.session.add(self.tweet)
   231         self.session.add(self.tweet)
   232         self.session.flush()
       
   233             
   232             
   234         # get entities
   233         # get entities
   235         for ind_type, entity_list in self.json_dict["entities"].items():
   234         if "entities" in self.json_dict:
   236             for ind in entity_list:
   235             for ind_type, entity_list in self.json_dict["entities"].items():
   237                 self.__process_entity(ind, ind_type)
   236                 for ind in entity_list:
       
   237                     self.__process_entity(ind, ind_type)
       
   238         else:
       
   239             extractor = twitter_text.Extractor(self.tweet.text)
       
   240     
       
   241             for ind in extractor.extract_hashtags_with_indices():
       
   242                 self.__process_entity(ind, "hashtags")
       
   243     
       
   244             for ind in extractor.extract_mentioned_screen_names_with_indices():
       
   245                 self.__process_entity(ind, "user_mentions")
       
   246     
       
   247             for ind in extractor.extract_urls_with_indices():
       
   248                 self.__process_entity(ind, "urls")
       
   249 
       
   250         self.session.flush()
   238 
   251 
   239 
   252 
   240     def __process_twitter_rest(self):
   253     def __process_twitter_rest(self):
   241         tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count()
   254         tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count()
   242         if tweet_nb > 0:
   255         if tweet_nb > 0: