treat entities
authorYves-Marie Haussonne <1218002+ymph@users.noreply.github.com>
Wed, 27 Jul 2011 12:24:43 +0200
changeset 247 e6b328970ee8
parent 199 514e0ee0c68a
child 248 ffb0a6d08000
treat entities
script/lib/iri_tweet/utils.py
--- a/script/lib/iri_tweet/utils.py	Fri Jun 24 23:41:52 2011 +0200
+++ b/script/lib/iri_tweet/utils.py	Wed Jul 27 12:24:43 2011 +0200
@@ -229,12 +229,25 @@
         
         self.tweet = Tweet(**ts_copy)
         self.session.add(self.tweet)
-        self.session.flush()
             
         # get entities
-        for ind_type, entity_list in self.json_dict["entities"].items():
-            for ind in entity_list:
-                self.__process_entity(ind, ind_type)
+        if "entities" in self.json_dict:
+            for ind_type, entity_list in self.json_dict["entities"].items():
+                for ind in entity_list:
+                    self.__process_entity(ind, ind_type)
+        else:
+            extractor = twitter_text.Extractor(self.tweet.text)
+    
+            for ind in extractor.extract_hashtags_with_indices():
+                self.__process_entity(ind, "hashtags")
+    
+            for ind in extractor.extract_mentioned_screen_names_with_indices():
+                self.__process_entity(ind, "user_mentions")
+    
+            for ind in extractor.extract_urls_with_indices():
+                self.__process_entity(ind, "urls")
+
+        self.session.flush()
 
 
     def __process_twitter_rest(self):