Adapt recorder_stream to python 3
Improve twitter authentication management
Use Oauth2 where possible
Delete old script
#from models import setup_database
import argparse
import codecs
import json
import logging
import re
import sys
import twitter
from iri_tweet.models import Tweet, TweetLog, TweetSource, setup_database
from iri_tweet.processor import TwitterProcessorStatus
from iri_tweet.utils import get_oauth_token, show_progress
logger = logging.getLogger(__name__)
def get_option():
parser = argparse.ArgumentParser(description='Merge tweets databases')
parser.add_argument("-l", "--log", dest="logfile",
help="log to file", metavar="LOG", default="stderr")
parser.add_argument("-v", dest="verbose", action="count",
help="verbose", default=0)
parser.add_argument("-k", "--key", dest="consumer_key",
help="Twitter consumer key", metavar="CONSUMER_KEY")
parser.add_argument("-s", "--secret", dest="consumer_secret",
help="Twitter consumer secret", metavar="CONSUMER_SECRET")
parser.add_argument("-q", dest="quiet", action="count",
help="quiet", default=0)
parser.add_argument("--query-user", dest="query_user", action="store_true",
help="Query twitter for user information", default=False)
parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token",
help="Token file name")
parser.add_argument("source", action="store", nargs=1, type=str, metavar="SOURCE")
parser.add_argument("target", action="store", nargs=1, type=str, metavar="TARGET")
return parser.parse_args()
if __name__ == "__main__":
#sys.stdout = codecs.getwriter(sys.stdout.encoding)(sys.stdout)
writer = None
options = get_option()
twitter_auth = None
if options.query_user:
acess_token_key, access_token_secret = get_oauth_token(consumer_key=options.consumer_key, consumer_secret=options.consumer_secret, token_file_path=options.token_filename)
twitter_auth = twitter.OAuth(acess_token_key, access_token_secret, options.consumer_key, options.consumer_secret)
#open source
src_conn_str = options.source[0].strip()
if not re.match(r"^\w+://.+", src_conn_str):
src_conn_str = 'sqlite:///' + src_conn_str
tgt_conn_str = options.target[0].strip()
if not re.match(r"^\w+://.+", tgt_conn_str):
tgt_conn_str = 'sqlite:///' + tgt_conn_str
engine_src, metadata_src, Session_src = setup_database(src_conn_str, echo=((options.verbose-options.quiet)>0), create_all = False)
engine_tgt, metadata_tgt, Session_tgt = setup_database(tgt_conn_str, echo=((options.verbose-options.quiet)>0), create_all = False)
conn_src = conn_tgt = session_src = session_tgt = None
try:
#conn_src = engine_src.connect()
#conn_tgt = engine_tgt.connect()
session_src = Session_src()
session_tgt = Session_tgt()
count_tw = session_src.query(Tweet).count()
if count_tw == 0:
print("No tweet to process : exit")
sys.exit()
query_src = session_src.query(Tweet).join(TweetSource).yield_per(100)
added = 0
for i,tweet in enumerate(query_src):
tweet_count = session_tgt.query(Tweet).filter(Tweet.id == tweet.id).count()
progress_text = u"Process: "
if tweet_count == 0:
added += 1
progress_text = u"Adding : "
tweet_source = tweet.tweet_source.original_json
tweet_obj = json.loads(tweet_source)
if 'text' not in tweet_obj:
tweet_log = TweetLog(tweet_source_id=tweet.tweet_source.id, status=TweetLog.TWEET_STATUS['NOT_TWEET'])
session_tgt.add(tweet_log)
else:
tp = TwitterProcessorStatus(None, tweet_source, None, session_tgt, twitter_auth=twitter_auth, user_query_twitter=options.query_user, logger=logger)
tp.process()
session_tgt.flush()
ptext = progress_text + tweet.text
writer = show_progress(i+1, count_tw, ptext.replace("\n",""), 70, writer)
session_tgt.commit()
print(u"%d new tweet added" % (added,))
finally:
if session_tgt is not None:
session_tgt.close()
if session_src is not None:
session_src.close()
if conn_tgt is not None:
conn_tgt.close()
if conn_src is not None:
conn_src.close()