diff -r ee6305b4a7dc -r cdd7d3c0549c script/lib/iri_tweet/utils.py --- a/script/lib/iri_tweet/utils.py Tue Jul 26 23:57:09 2011 +0200 +++ b/script/lib/iri_tweet/utils.py Wed Jul 27 00:04:55 2011 +0200 @@ -1,6 +1,6 @@ -from models import Tweet, User, Hashtag, EntityHashtag, EntityUser, Url, \ - EntityUrl, CONSUMER_KEY, CONSUMER_SECRET, APPLICATION_NAME, ACCESS_TOKEN_KEY, \ - ACCESS_TOKEN_SECRET, adapt_date, adapt_json +from models import (Tweet, User, Hashtag, EntityHashtag, EntityUser, Url, + EntityUrl, CONSUMER_KEY, CONSUMER_SECRET, APPLICATION_NAME, ACCESS_TOKEN_KEY, + ACCESS_TOKEN_SECRET, adapt_date, adapt_json, TweetSource, TweetLog) from sqlalchemy.sql import select, or_ #@UnresolvedImport import anyjson #@UnresolvedImport import datetime @@ -83,7 +83,7 @@ class TwitterProcessor(object): - def __init__(self, json_dict, json_txt, session, token_filename=None): + def __init__(self, json_dict, json_txt, source_id, session, token_filename=None): if json_dict is None and json_txt is None: raise TwitterProcessorException("No json") @@ -101,6 +101,7 @@ if "id" not in self.json_dict: raise TwitterProcessorException("No id in json") + self.source_id = source_id self.session = session self.token_filename = token_filename @@ -225,7 +226,8 @@ else: ts_copy["user"] = user ts_copy["user_id"] = ts_copy["user"].id - ts_copy["original_json"] = self.json_txt + + ts_copy["tweet_source_id"] = self.source_id self.tweet = Tweet(**ts_copy) self.session.add(self.tweet) @@ -241,7 +243,8 @@ tweet_nb = self.session.query(Tweet).filter(Tweet.id == self.json_dict["id"]).count() if tweet_nb > 0: return - + + tweet_fields = { 'created_at': self.json_dict["created_at"], 'favorited': False, @@ -253,8 +256,8 @@ #'place': ts["place"], 'source': self.json_dict["source"], 'text': self.json_dict["text"], - 'truncated': False, - 'original_json' : self.json_txt, + 'truncated': False, + 'tweet_source_id' : self.source_id, } #user @@ -295,10 +298,23 @@ def process(self): - if "metadata" in self.json_dict: - self.__process_twitter_rest() - else: - self.__process_twitter_stream() + + if self.source_id is None: + tweet_source = TweetSource(original_json=self.json_txt); + self.session.add(tweet_source) + self.session.flush() + self.source_id = tweet_source.id + + try: + if "metadata" in self.json_dict: + self.__process_twitter_rest() + else: + self.__process_twitter_stream() + + tweet_log = TweetLog(tweet_source_id = self.source_id, status = TweetLog.TWEET_STATUS['OK']) + except: + + raise def set_logging(options):