--- a/script/utils/merge_tweets.py Wed Jan 02 17:49:19 2019 +0100
+++ b/script/utils/merge_tweets.py Thu Jan 10 18:36:36 2019 +0100
@@ -5,6 +5,7 @@
import logging
import re
import sys
+import twitter
from iri_tweet.models import Tweet, TweetLog, TweetSource, setup_database
from iri_tweet.processor import TwitterProcessorStatus
@@ -13,7 +14,7 @@
logger = logging.getLogger(__name__)
def get_option():
-
+
parser = argparse.ArgumentParser(description='Merge tweets databases')
parser.add_argument("-l", "--log", dest="logfile",
@@ -31,23 +32,24 @@
parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
help="Token file name")
-
+
parser.add_argument("source", action="store", nargs=1, type=str, metavar="SOURCE")
parser.add_argument("target", action="store", nargs=1, type=str, metavar="TARGET")
-
+
return parser.parse_args()
if __name__ == "__main__":
-
+
#sys.stdout = codecs.getwriter(sys.stdout.encoding)(sys.stdout)
writer = None
options = get_option()
-
- access_token = None
+
+ twitter_auth = None
if options.query_user:
- access_token = get_oauth_token(options.consumer_key, options.consumer_secret, options.token_filename)
-
+ acess_token_key, access_token_secret = get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename)
+ twitter_auth = twitter.OAuth(acess_token_key, access_token_secret, options.consumer_key, options.consumer_secret)
+
#open source
src_conn_str = options.source[0].strip()
if not re.match(r"^\w+://.+", src_conn_str):
@@ -58,51 +60,51 @@
engine_src, metadata_src, Session_src = setup_database(src_conn_str, echo=((options.verbose-options.quiet)>0), create_all = False)
- engine_tgt, metadata_tgt, Session_tgt = setup_database(tgt_conn_str, echo=((options.verbose-options.quiet)>0), create_all = False)
+ engine_tgt, metadata_tgt, Session_tgt = setup_database(tgt_conn_str, echo=((options.verbose-options.quiet)>0), create_all = False)
conn_src = conn_tgt = session_src = session_tgt = None
-
+
try:
#conn_src = engine_src.connect()
#conn_tgt = engine_tgt.connect()
session_src = Session_src()
session_tgt = Session_tgt()
-
+
count_tw = session_src.query(Tweet).count()
-
+
if count_tw == 0:
print("No tweet to process : exit")
sys.exit()
-
+
query_src = session_src.query(Tweet).join(TweetSource).yield_per(100)
added = 0
-
+
for i,tweet in enumerate(query_src):
-
+
tweet_count = session_tgt.query(Tweet).filter(Tweet.id == tweet.id).count()
-
+
progress_text = u"Process: "
if tweet_count == 0:
added += 1
progress_text = u"Adding : "
tweet_source = tweet.tweet_source.original_json
-
+
tweet_obj = json.loads(tweet_source)
if 'text' not in tweet_obj:
tweet_log = TweetLog(tweet_source_id=tweet.tweet_source.id, status=TweetLog.TWEET_STATUS['NOT_TWEET'])
session_tgt.add(tweet_log)
- else:
- tp = TwitterProcessorStatus(None, tweet_source, None, session_tgt, access_token, options.token_filename, user_query_twitter=options.query_user, logger=logger)
+ else:
+ tp = TwitterProcessorStatus(None, tweet_source, None, session_tgt, twitter_auth=twitter_auth, user_query_twitter=options.query_user, logger=logger)
tp.process()
-
+
session_tgt.flush()
-
+
ptext = progress_text + tweet.text
writer = show_progress(i+1, count_tw, ptext.replace("\n",""), 70, writer)
-
+
session_tgt.commit()
print(u"%d new tweet added" % (added,))
-
+
finally:
if session_tgt is not None:
session_tgt.close()