1 #from models import setup_database |
1 #from models import setup_database |
2 from iri_tweet.models import setup_database, TweetSource, Tweet, TweetLog |
2 from iri_tweet.models import setup_database, TweetSource, Tweet, TweetLog |
3 from iri_tweet.utils import TwitterProcessor, get_oauth_token, show_progress |
3 from iri_tweet.processor import TwitterProcessorStatus |
|
4 from iri_tweet.utils import get_oauth_token, show_progress |
|
5 import anyjson |
4 import argparse |
6 import argparse |
|
7 import codecs |
|
8 import logging |
|
9 import re |
5 import sys |
10 import sys |
6 import re |
11 |
7 import anyjson |
12 logger = logging.getLogger(__name__) |
8 import math |
|
9 import codecs |
|
10 |
13 |
11 def get_option(): |
14 def get_option(): |
12 |
15 |
13 parser = argparse.ArgumentParser(description='Merge tweets databases') |
16 parser = argparse.ArgumentParser(description='Merge tweets databases') |
14 |
17 |
15 parser.add_argument("-l", "--log", dest="logfile", |
18 parser.add_argument("-l", "--log", dest="logfile", |
16 help="log to file", metavar="LOG", default="stderr") |
19 help="log to file", metavar="LOG", default="stderr") |
17 parser.add_argument("-v", dest="verbose", action="count", |
20 parser.add_argument("-v", dest="verbose", action="count", |
18 help="verbose", default=0) |
21 help="verbose", default=0) |
|
22 parser.add_option("-k", "--key", dest="consumer_key", |
|
23 help="Twitter consumer key", metavar="CONSUMER_KEY") |
|
24 parser.add_option("-s", "--secret", dest="consumer_secret", |
|
25 help="Twitter consumer secret", metavar="CONSUMER_SECRET") |
19 parser.add_argument("-q", dest="quiet", action="count", |
26 parser.add_argument("-q", dest="quiet", action="count", |
20 help="quiet", default=0) |
27 help="quiet", default=0) |
21 parser.add_argument("--query-user", dest="query_user", action="store_true", |
28 parser.add_argument("--query-user", dest="query_user", action="store_true", |
22 help="Query twitter for user information", default=False) |
29 help="Query twitter for user information", default=False) |
23 parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token", |
30 parser.add_argument("-t", dest="token_filename", metavar="TOKEN_FILENAME", default=".oauth_token", |
36 |
43 |
37 options = get_option() |
44 options = get_option() |
38 |
45 |
39 access_token = None |
46 access_token = None |
40 if options.query_user: |
47 if options.query_user: |
41 access_token = get_oauth_token(options.token_filename) |
48 access_token = get_oauth_token(options.consumer_key, options.consumer_secret, options.token_filename) |
42 |
49 |
43 #open source |
50 #open source |
44 src_conn_str = options.source[0].strip() |
51 src_conn_str = options.source[0].strip() |
45 if not re.match("^\w+://.+", src_conn_str): |
52 if not re.match("^\w+://.+", src_conn_str): |
46 src_conn_str = 'sqlite:///' + src_conn_str |
53 src_conn_str = 'sqlite:///' + src_conn_str |
58 #conn_src = engine_src.connect() |
65 #conn_src = engine_src.connect() |
59 #conn_tgt = engine_tgt.connect() |
66 #conn_tgt = engine_tgt.connect() |
60 session_src = Session_src() |
67 session_src = Session_src() |
61 session_tgt = Session_tgt() |
68 session_tgt = Session_tgt() |
62 |
69 |
63 count_tw_query = Tweet.__table__.count() |
70 count_tw_query = Tweet.__table__.count() # @UndefinedVariable |
64 |
71 |
65 count_tw = engine_src.scalar(count_tw_query) |
72 count_tw = engine_src.scalar(count_tw_query) |
66 |
73 |
67 if count_tw == 0: |
74 if count_tw == 0: |
68 print "No tweet to process : exit" |
75 print "No tweet to process : exit" |
81 progress_text = u"Adding : " |
88 progress_text = u"Adding : " |
82 tweet_source = tweet.tweet_source.original_json |
89 tweet_source = tweet.tweet_source.original_json |
83 |
90 |
84 tweet_obj = anyjson.deserialize(tweet_source) |
91 tweet_obj = anyjson.deserialize(tweet_source) |
85 if 'text' not in tweet_obj: |
92 if 'text' not in tweet_obj: |
86 tweet_log = TweetLog(tweet_source_id=source_id, status=TweetLog.TWEET_STATUS['NOT_TWEET']) |
93 tweet_log = TweetLog(tweet_source_id=tweet.tweet_source.id, status=TweetLog.TWEET_STATUS['NOT_TWEET']) |
87 session_tgt.add(tweet_log) |
94 session_tgt.add(tweet_log) |
88 else: |
95 else: |
89 tp = TwitterProcessor(None, tweet_source, None, session_tgt, access_token, options.token_filename, user_query_twitter=options.query_user) |
96 tp = TwitterProcessorStatus(None, tweet_source, None, session_tgt, access_token, options.token_filename, user_query_twitter=options.query_user, logger=logger) |
90 tp.process() |
97 tp.process() |
91 |
98 |
92 session_tgt.flush() |
99 session_tgt.flush() |
93 |
100 |
94 ptext = progress_text + tweet.text |
101 ptext = progress_text + tweet.text |