83 help="Replace tweet ensemble", metavar="REPLACE", default=False) |
83 help="Replace tweet ensemble", metavar="REPLACE", default=False) |
84 parser.add_option("-L", "--list-conf", dest="listconf", |
84 parser.add_option("-L", "--list-conf", dest="listconf", |
85 help="list of file to process", metavar="LIST_CONF", default=None) |
85 help="list of file to process", metavar="LIST_CONF", default=None) |
86 parser.add_option("-E", "--extended", dest="extended_mode", action="store_true", |
86 parser.add_option("-E", "--extended", dest="extended_mode", action="store_true", |
87 help="Trigger polemic extended mode", metavar="EXTENDED", default=False) |
87 help="Trigger polemic extended mode", metavar="EXTENDED", default=False) |
|
88 parser.add_option("--user-whitelist", dest="user_whitelist", action="store", |
|
89 help="A list of user screen name", metavar="USER_WHITELIST",default=None) |
88 |
90 |
89 |
91 |
90 set_logging_options(parser) |
92 set_logging_options(parser) |
91 |
93 |
92 |
94 |
121 if options.exclude and os.path.exists(options.exclude): |
123 if options.exclude and os.path.exists(options.exclude): |
122 with open(options.exclude, 'r+') as f: |
124 with open(options.exclude, 'r+') as f: |
123 tei = tweet_exclude_table.insert() |
125 tei = tweet_exclude_table.insert() |
124 for line in f: |
126 for line in f: |
125 conn.execute(tei.values(id=long(line.strip()))) |
127 conn.execute(tei.values(id=long(line.strip()))) |
126 |
128 user_whitelist_file = options.user_whitelist |
|
129 user_whitelist = None |
|
130 |
127 if options.listconf: |
131 if options.listconf: |
128 |
132 |
129 parameters = [] |
133 parameters = [] |
130 confdoc = etree.parse(options.listconf) |
134 confdoc = etree.parse(options.listconf) |
131 for node in confdoc.xpath("/twitter_export/file"): |
135 for node in confdoc.xpath("/twitter_export/file"): |
160 start_date_str = params.get("start_date",None) |
164 start_date_str = params.get("start_date",None) |
161 end_date_str = params.get("end_date", None) |
165 end_date_str = params.get("end_date", None) |
162 duration = params.get("duration", None) |
166 duration = params.get("duration", None) |
163 content_file = params.get("content_file", None) |
167 content_file = params.get("content_file", None) |
164 hashtags = params.get('hashtags', []) |
168 hashtags = params.get('hashtags', []) |
165 |
169 |
|
170 if user_whitelist_file: |
|
171 with open(user_whitelist_file, 'r+') as f: |
|
172 user_whitelist = list(set([s.strip() for s in f])) |
166 |
173 |
167 start_date = parse_date(start_date_str) |
174 start_date = parse_date(start_date_str) |
168 ts = time.mktime(start_date.timetuple()) |
175 ts = time.mktime(start_date.timetuple()) |
169 |
176 |
170 if end_date_str: |
177 if end_date_str: |
172 te = time.mktime(end_date.timetuple()) |
179 te = time.mktime(end_date.timetuple()) |
173 else: |
180 else: |
174 te = ts + duration |
181 te = ts + duration |
175 end_date = start_date + datetime.timedelta(seconds=duration) |
182 end_date = start_date + datetime.timedelta(seconds=duration) |
176 |
183 |
177 query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table) |
184 query = get_filter_query(session, start_date, end_date, hashtags, tweet_exclude_table, user_whitelist) |
178 |
185 |
179 query_res = query.all() |
186 query_res = query.all() |
180 |
187 |
181 root = None |
188 root = None |
182 ensemble_parent = None |
189 ensemble_parent = None |