1 #from models import setup_database |
1 #from models import setup_database |
2 from iri_tweet.models import setup_database, TweetSource, Tweet, TweetLog |
|
3 from iri_tweet.processor import TwitterProcessorStatus |
|
4 from iri_tweet.utils import get_oauth_token, show_progress |
|
5 import anyjson |
|
6 import argparse |
2 import argparse |
7 import codecs |
3 import codecs |
|
4 import json |
8 import logging |
5 import logging |
9 import re |
6 import re |
10 import sys |
7 import sys |
|
8 |
|
9 from iri_tweet.models import Tweet, TweetLog, TweetSource, setup_database |
|
10 from iri_tweet.processor import TwitterProcessorStatus |
|
11 from iri_tweet.utils import get_oauth_token, show_progress |
11 |
12 |
12 logger = logging.getLogger(__name__) |
13 logger = logging.getLogger(__name__) |
13 |
14 |
14 def get_option(): |
15 def get_option(): |
15 |
16 |
47 if options.query_user: |
48 if options.query_user: |
48 access_token = get_oauth_token(options.consumer_key, options.consumer_secret, options.token_filename) |
49 access_token = get_oauth_token(options.consumer_key, options.consumer_secret, options.token_filename) |
49 |
50 |
50 #open source |
51 #open source |
51 src_conn_str = options.source[0].strip() |
52 src_conn_str = options.source[0].strip() |
52 if not re.match("^\w+://.+", src_conn_str): |
53 if not re.match(r"^\w+://.+", src_conn_str): |
53 src_conn_str = 'sqlite:///' + src_conn_str |
54 src_conn_str = 'sqlite:///' + src_conn_str |
54 tgt_conn_str = options.target[0].strip() |
55 tgt_conn_str = options.target[0].strip() |
55 if not re.match("^\w+://.+", tgt_conn_str): |
56 if not re.match(r"^\w+://.+", tgt_conn_str): |
56 tgt_conn_str = 'sqlite:///' + tgt_conn_str |
57 tgt_conn_str = 'sqlite:///' + tgt_conn_str |
57 |
58 |
58 |
59 |
59 engine_src, metadata_src, Session_src = setup_database(src_conn_str, echo=((options.verbose-options.quiet)>0), create_all = False) |
60 engine_src, metadata_src, Session_src = setup_database(src_conn_str, echo=((options.verbose-options.quiet)>0), create_all = False) |
60 engine_tgt, metadata_tgt, Session_tgt = setup_database(tgt_conn_str, echo=((options.verbose-options.quiet)>0), create_all = False) |
61 engine_tgt, metadata_tgt, Session_tgt = setup_database(tgt_conn_str, echo=((options.verbose-options.quiet)>0), create_all = False) |
64 try: |
65 try: |
65 #conn_src = engine_src.connect() |
66 #conn_src = engine_src.connect() |
66 #conn_tgt = engine_tgt.connect() |
67 #conn_tgt = engine_tgt.connect() |
67 session_src = Session_src() |
68 session_src = Session_src() |
68 session_tgt = Session_tgt() |
69 session_tgt = Session_tgt() |
69 |
70 |
70 count_tw_query = Tweet.__table__.count() # @UndefinedVariable |
71 count_tw = session_src.query(Tweet).count() |
71 |
|
72 count_tw = engine_src.scalar(count_tw_query) |
|
73 |
72 |
74 if count_tw == 0: |
73 if count_tw == 0: |
75 print "No tweet to process : exit" |
74 print("No tweet to process : exit") |
76 sys.exit() |
75 sys.exit() |
77 |
76 |
78 query_src = session_src.query(Tweet).join(TweetSource).yield_per(100) |
77 query_src = session_src.query(Tweet).join(TweetSource).yield_per(100) |
79 added = 0 |
78 added = 0 |
80 |
79 |
86 if tweet_count == 0: |
85 if tweet_count == 0: |
87 added += 1 |
86 added += 1 |
88 progress_text = u"Adding : " |
87 progress_text = u"Adding : " |
89 tweet_source = tweet.tweet_source.original_json |
88 tweet_source = tweet.tweet_source.original_json |
90 |
89 |
91 tweet_obj = anyjson.deserialize(tweet_source) |
90 tweet_obj = json.loads(tweet_source) |
92 if 'text' not in tweet_obj: |
91 if 'text' not in tweet_obj: |
93 tweet_log = TweetLog(tweet_source_id=tweet.tweet_source.id, status=TweetLog.TWEET_STATUS['NOT_TWEET']) |
92 tweet_log = TweetLog(tweet_source_id=tweet.tweet_source.id, status=TweetLog.TWEET_STATUS['NOT_TWEET']) |
94 session_tgt.add(tweet_log) |
93 session_tgt.add(tweet_log) |
95 else: |
94 else: |
96 tp = TwitterProcessorStatus(None, tweet_source, None, session_tgt, access_token, options.token_filename, user_query_twitter=options.query_user, logger=logger) |
95 tp = TwitterProcessorStatus(None, tweet_source, None, session_tgt, access_token, options.token_filename, user_query_twitter=options.query_user, logger=logger) |
100 |
99 |
101 ptext = progress_text + tweet.text |
100 ptext = progress_text + tweet.text |
102 writer = show_progress(i+1, count_tw, ptext.replace("\n",""), 70, writer) |
101 writer = show_progress(i+1, count_tw, ptext.replace("\n",""), 70, writer) |
103 |
102 |
104 session_tgt.commit() |
103 session_tgt.commit() |
105 print u"%d new tweet added" % (added) |
104 print(u"%d new tweet added" % (added,)) |
106 |
105 |
107 finally: |
106 finally: |
108 if session_tgt is not None: |
107 if session_tgt is not None: |
109 session_tgt.close() |
108 session_tgt.close() |
110 if session_src is not None: |
109 if session_src is not None: |
111 session_src.close() |
110 session_src.close() |
112 if conn_tgt is not None: |
111 if conn_tgt is not None: |
113 conn_tgt.close() |
112 conn_tgt.close() |
114 if conn_src is not None: |
113 if conn_src is not None: |
115 conn_src.close() |
114 conn_src.close() |
116 |
|
117 |
|