227 ts_copy["user_id"] = ts_copy["user"].id |
227 ts_copy["user_id"] = ts_copy["user"].id |
228 ts_copy["original_json"] = self.json_txt |
228 ts_copy["original_json"] = self.json_txt |
229 |
229 |
230 self.tweet = Tweet(**ts_copy) |
230 self.tweet = Tweet(**ts_copy) |
231 self.session.add(self.tweet) |
231 self.session.add(self.tweet) |
232 self.session.flush() |
|
233 |
232 |
234 # get entities |
233 # get entities |
235 for ind_type, entity_list in self.json_dict["entities"].items(): |
234 if "entities" in self.json_dict: |
236 for ind in entity_list: |
235 for ind_type, entity_list in self.json_dict["entities"].items(): |
237 self.__process_entity(ind, ind_type) |
236 for ind in entity_list: |
|
237 self.__process_entity(ind, ind_type) |
|
238 else: |
|
239 extractor = twitter_text.Extractor(self.tweet.text) |
|
240 |
|
241 for ind in extractor.extract_hashtags_with_indices(): |
|
242 self.__process_entity(ind, "hashtags") |
|
243 |
|
244 for ind in extractor.extract_mentioned_screen_names_with_indices(): |
|
245 self.__process_entity(ind, "user_mentions") |
|
246 |
|
247 for ind in extractor.extract_urls_with_indices(): |
|
248 self.__process_entity(ind, "urls") |
|
249 |
|
250 self.session.flush() |
238 |
251 |
239 |
252 |
240 def __process_twitter_rest(self): |
253 def __process_twitter_rest(self): |
241 tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count() |
254 tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count() |
242 if tweet_nb > 0: |
255 if tweet_nb > 0: |