diff -r efbda157eb57 -r 184372ec27e2 script/utils/merge_tweets.py --- a/script/utils/merge_tweets.py Fri Dec 21 12:33:01 2018 +0100 +++ b/script/utils/merge_tweets.py Wed Jan 02 17:49:19 2019 +0100 @@ -1,14 +1,15 @@ #from models import setup_database -from iri_tweet.models import setup_database, TweetSource, Tweet, TweetLog -from iri_tweet.processor import TwitterProcessorStatus -from iri_tweet.utils import get_oauth_token, show_progress -import anyjson import argparse import codecs +import json import logging import re import sys +from iri_tweet.models import Tweet, TweetLog, TweetSource, setup_database +from iri_tweet.processor import TwitterProcessorStatus +from iri_tweet.utils import get_oauth_token, show_progress + logger = logging.getLogger(__name__) def get_option(): @@ -49,10 +50,10 @@ #open source src_conn_str = options.source[0].strip() - if not re.match("^\w+://.+", src_conn_str): + if not re.match(r"^\w+://.+", src_conn_str): src_conn_str = 'sqlite:///' + src_conn_str tgt_conn_str = options.target[0].strip() - if not re.match("^\w+://.+", tgt_conn_str): + if not re.match(r"^\w+://.+", tgt_conn_str): tgt_conn_str = 'sqlite:///' + tgt_conn_str @@ -66,13 +67,11 @@ #conn_tgt = engine_tgt.connect() session_src = Session_src() session_tgt = Session_tgt() - - count_tw_query = Tweet.__table__.count() # @UndefinedVariable - - count_tw = engine_src.scalar(count_tw_query) + + count_tw = session_src.query(Tweet).count() if count_tw == 0: - print "No tweet to process : exit" + print("No tweet to process : exit") sys.exit() query_src = session_src.query(Tweet).join(TweetSource).yield_per(100) @@ -88,7 +87,7 @@ progress_text = u"Adding : " tweet_source = tweet.tweet_source.original_json - tweet_obj = anyjson.deserialize(tweet_source) + tweet_obj = json.loads(tweet_source) if 'text' not in tweet_obj: tweet_log = TweetLog(tweet_source_id=tweet.tweet_source.id, status=TweetLog.TWEET_STATUS['NOT_TWEET']) session_tgt.add(tweet_log) @@ -102,7 +101,7 @@ writer = show_progress(i+1, count_tw, ptext.replace("\n",""), 70, writer) session_tgt.commit() - print u"%d new tweet added" % (added) + print(u"%d new tweet added" % (added,)) finally: if session_tgt is not None: @@ -113,5 +112,3 @@ conn_tgt.close() if conn_src is not None: conn_src.close() - - \ No newline at end of file