script/lib/iri_tweet/utils.py
changeset 242 cdd7d3c0549c
parent 203 8124cde38141
child 243 9213a63fa34a
--- a/script/lib/iri_tweet/utils.py	Tue Jul 26 23:57:09 2011 +0200
+++ b/script/lib/iri_tweet/utils.py	Wed Jul 27 00:04:55 2011 +0200
@@ -1,6 +1,6 @@
-from models import Tweet, User, Hashtag, EntityHashtag, EntityUser, Url, \
-    EntityUrl, CONSUMER_KEY, CONSUMER_SECRET, APPLICATION_NAME, ACCESS_TOKEN_KEY, \
-    ACCESS_TOKEN_SECRET, adapt_date, adapt_json
+from models import (Tweet, User, Hashtag, EntityHashtag, EntityUser, Url, 
+    EntityUrl, CONSUMER_KEY, CONSUMER_SECRET, APPLICATION_NAME, ACCESS_TOKEN_KEY, 
+    ACCESS_TOKEN_SECRET, adapt_date, adapt_json, TweetSource, TweetLog)
 from sqlalchemy.sql import select, or_ #@UnresolvedImport
 import anyjson #@UnresolvedImport
 import datetime
@@ -83,7 +83,7 @@
 
 class TwitterProcessor(object):
     
-    def __init__(self, json_dict, json_txt, session, token_filename=None):
+    def __init__(self, json_dict, json_txt, source_id, session, token_filename=None):
 
         if json_dict is None and json_txt is None:
             raise TwitterProcessorException("No json")
@@ -101,6 +101,7 @@
         if "id" not in self.json_dict:
             raise TwitterProcessorException("No id in json")
         
+        self.source_id = source_id
         self.session = session
         self.token_filename = token_filename
 
@@ -225,7 +226,8 @@
         else:
             ts_copy["user"] = user
             ts_copy["user_id"] = ts_copy["user"].id
-        ts_copy["original_json"] = self.json_txt
+            
+        ts_copy["tweet_source_id"] = self.source_id
         
         self.tweet = Tweet(**ts_copy)
         self.session.add(self.tweet)
@@ -241,7 +243,8 @@
         tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count()
         if tweet_nb > 0:
             return
-            
+        
+        
         tweet_fields = {
             'created_at': self.json_dict["created_at"], 
             'favorited': False,
@@ -253,8 +256,8 @@
             #'place': ts["place"],
             'source': self.json_dict["source"],
             'text': self.json_dict["text"],
-            'truncated': False,
-            'original_json' : self.json_txt,
+            'truncated': False,            
+            'tweet_source_id' : self.source_id,
         }
         
         #user
@@ -295,10 +298,23 @@
 
 
     def process(self):
-        if "metadata" in self.json_dict:
-            self.__process_twitter_rest()
-        else:
-            self.__process_twitter_stream()
+        
+        if self.source_id is None:
+            tweet_source = TweetSource(original_json=self.json_txt);
+            self.session.add(tweet_source)
+            self.session.flush()
+            self.source_id = tweet_source.id
+        
+        try:
+            if "metadata" in self.json_dict:
+                self.__process_twitter_rest()
+            else:
+                self.__process_twitter_stream()
+                
+            tweet_log = TweetLog(tweet_source_id = self.source_id, status = TweetLog.TWEET_STATUS['OK'])
+        except:
+            
+            raise
         
 
 def set_logging(options):