--- a/script/lib/iri_tweet/utils.py Tue Jul 26 23:57:09 2011 +0200
+++ b/script/lib/iri_tweet/utils.py Wed Jul 27 00:04:55 2011 +0200
@@ -1,6 +1,6 @@
-from models import Tweet, User, Hashtag, EntityHashtag, EntityUser, Url, \
- EntityUrl, CONSUMER_KEY, CONSUMER_SECRET, APPLICATION_NAME, ACCESS_TOKEN_KEY, \
- ACCESS_TOKEN_SECRET, adapt_date, adapt_json
+from models import (Tweet, User, Hashtag, EntityHashtag, EntityUser, Url,
+ EntityUrl, CONSUMER_KEY, CONSUMER_SECRET, APPLICATION_NAME, ACCESS_TOKEN_KEY,
+ ACCESS_TOKEN_SECRET, adapt_date, adapt_json, TweetSource, TweetLog)
from sqlalchemy.sql import select, or_ #@UnresolvedImport
import anyjson #@UnresolvedImport
import datetime
@@ -83,7 +83,7 @@
class TwitterProcessor(object):
- def __init__(self, json_dict, json_txt, session, token_filename=None):
+ def __init__(self, json_dict, json_txt, source_id, session, token_filename=None):
if json_dict is None and json_txt is None:
raise TwitterProcessorException("No json")
@@ -101,6 +101,7 @@
if "id" not in self.json_dict:
raise TwitterProcessorException("No id in json")
+ self.source_id = source_id
self.session = session
self.token_filename = token_filename
@@ -225,7 +226,8 @@
else:
ts_copy["user"] = user
ts_copy["user_id"] = ts_copy["user"].id
- ts_copy["original_json"] = self.json_txt
+
+ ts_copy["tweet_source_id"] = self.source_id
self.tweet = Tweet(**ts_copy)
self.session.add(self.tweet)
@@ -241,7 +243,8 @@
tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count()
if tweet_nb > 0:
return
-
+
+
tweet_fields = {
'created_at': self.json_dict["created_at"],
'favorited': False,
@@ -253,8 +256,8 @@
#'place': ts["place"],
'source': self.json_dict["source"],
'text': self.json_dict["text"],
- 'truncated': False,
- 'original_json' : self.json_txt,
+ 'truncated': False,
+ 'tweet_source_id' : self.source_id,
}
#user
@@ -295,10 +298,23 @@
def process(self):
- if "metadata" in self.json_dict:
- self.__process_twitter_rest()
- else:
- self.__process_twitter_stream()
+
+ if self.source_id is None:
+ tweet_source = TweetSource(original_json=self.json_txt);
+ self.session.add(tweet_source)
+ self.session.flush()
+ self.source_id = tweet_source.id
+
+ try:
+ if "metadata" in self.json_dict:
+ self.__process_twitter_rest()
+ else:
+ self.__process_twitter_stream()
+
+ tweet_log = TweetLog(tweet_source_id = self.source_id, status = TweetLog.TWEET_STATUS['OK'])
+ except:
+
+ raise
def set_logging(options):