--- a/script/lib/iri_tweet/utils.py Fri Jun 24 23:41:52 2011 +0200
+++ b/script/lib/iri_tweet/utils.py Wed Jul 27 12:24:43 2011 +0200
@@ -229,12 +229,25 @@
self.tweet = Tweet(**ts_copy)
self.session.add(self.tweet)
- self.session.flush()
# get entities
- for ind_type, entity_list in self.json_dict["entities"].items():
- for ind in entity_list:
- self.__process_entity(ind, ind_type)
+ if "entities" in self.json_dict:
+ for ind_type, entity_list in self.json_dict["entities"].items():
+ for ind in entity_list:
+ self.__process_entity(ind, ind_type)
+ else:
+ extractor = twitter_text.Extractor(self.tweet.text)
+
+ for ind in extractor.extract_hashtags_with_indices():
+ self.__process_entity(ind, "hashtags")
+
+ for ind in extractor.extract_mentioned_screen_names_with_indices():
+ self.__process_entity(ind, "user_mentions")
+
+ for ind in extractor.extract_urls_with_indices():
+ self.__process_entity(ind, "urls")
+
+ self.session.flush()
def __process_twitter_rest(self):