diff -r 514e0ee0c68a -r e6b328970ee8 script/lib/iri_tweet/utils.py --- a/script/lib/iri_tweet/utils.py Fri Jun 24 23:41:52 2011 +0200 +++ b/script/lib/iri_tweet/utils.py Wed Jul 27 12:24:43 2011 +0200 @@ -229,12 +229,25 @@ self.tweet = Tweet(**ts_copy) self.session.add(self.tweet) - self.session.flush() # get entities - for ind_type, entity_list in self.json_dict["entities"].items(): - for ind in entity_list: - self.__process_entity(ind, ind_type) + if "entities" in self.json_dict: + for ind_type, entity_list in self.json_dict["entities"].items(): + for ind in entity_list: + self.__process_entity(ind, ind_type) + else: + extractor = twitter_text.Extractor(self.tweet.text) + + for ind in extractor.extract_hashtags_with_indices(): + self.__process_entity(ind, "hashtags") + + for ind in extractor.extract_mentioned_screen_names_with_indices(): + self.__process_entity(ind, "user_mentions") + + for ind in extractor.extract_urls_with_indices(): + self.__process_entity(ind, "urls") + + self.session.flush() def __process_twitter_rest(self):