2 # coding=utf-8 |
2 # coding=utf-8 |
3 |
3 |
4 from lxml import etree |
4 from lxml import etree |
5 from iri_tweet.models import setup_database, Tweet, User |
5 from iri_tweet.models import setup_database, Tweet, User |
6 from optparse import OptionParser #@UnresolvedImport |
6 from optparse import OptionParser #@UnresolvedImport |
7 from sqlalchemy import Table, Column, BigInteger |
7 from sqlalchemy import Table, Column, BigInteger, event, bindparam |
|
8 from sqlalchemy.sql import select, func |
8 from iri_tweet.utils import (set_logging_options, set_logging, get_filter_query, |
9 from iri_tweet.utils import (set_logging_options, set_logging, get_filter_query, |
9 get_logger) |
10 get_logger) |
10 import anyjson |
11 import anyjson |
11 import datetime |
12 import datetime |
12 import requests |
13 import requests |
24 # def __repr__(self): |
25 # def __repr__(self): |
25 # return "<TweetExclude(id=%d)>" % (self.id) |
26 # return "<TweetExclude(id=%d)>" % (self.id) |
26 |
27 |
27 LDT_CONTENT_REST_API_PATH = "api/ldt/1.0/contents/" |
28 LDT_CONTENT_REST_API_PATH = "api/ldt/1.0/contents/" |
28 LDT_PROJECT_REST_API_PATH = "api/ldt/1.0/projects/" |
29 LDT_PROJECT_REST_API_PATH = "api/ldt/1.0/projects/" |
29 |
30 |
|
31 def re_fn(expr, item): |
|
32 reg = re.compile(expr, re.I) |
|
33 res = reg.search(item) |
|
34 if res: |
|
35 get_logger().debug("re_fn : " + repr(expr) + "~" + repr(item)) #@UndefinedVariable |
|
36 return res is not None |
30 |
37 |
31 def parse_polemics(tw, extended_mode): |
38 def parse_polemics(tw, extended_mode): |
32 """ |
39 """ |
33 parse polemics in text and return a list of polemic code. None if not polemic found |
40 parse polemics in text and return a list of polemic code. None if not polemic found |
34 """ |
41 """ |
123 conn_str = 'sqlite:///' + conn_str |
130 conn_str = 'sqlite:///' + conn_str |
124 |
131 |
125 engine, metadata, Session = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False) |
132 engine, metadata, Session = setup_database(conn_str, echo=((options.verbose-options.quiet)>0), create_all = False) |
126 conn = None |
133 conn = None |
127 try : |
134 try : |
128 conn = engine.connect() |
135 conn = engine.connect() |
|
136 @event.listens_for(conn, "begin") |
|
137 def do_begin(conn): |
|
138 conn.connection.create_function('regexp', 2, re_fn) |
129 session = None |
139 session = None |
130 try : |
140 try : |
131 session = Session(bind=conn) |
141 session = Session(bind=conn) |
132 tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY']) |
142 tweet_exclude_table = Table("tweet_exclude", metadata, Column('id', BigInteger, primary_key=True), prefixes=['TEMPORARY']) |
133 #mapper(TweetExclude, tweet_exclude_table) |
143 #mapper(TweetExclude, tweet_exclude_table) |
144 conn.execute(tei.values(id=res.group('value'))) |
154 conn.execute(tei.values(id=res.group('value'))) |
145 else: |
155 else: |
146 exclude_query = session.query(Tweet) |
156 exclude_query = session.query(Tweet) |
147 filter_obj = Tweet |
157 filter_obj = Tweet |
148 filter_field = res.group('field') |
158 filter_field = res.group('field') |
149 if filter_field.startswith("user_"): |
159 if filter_field.startswith("user__"): |
150 exclude_query = exclude_query.join(User) |
160 exclude_query = exclude_query.outerjoin(User, Tweet.user_id==User.id) |
151 filter_obj = User |
161 filter_obj = User |
152 filter_field = filter_field[len("user_"):] |
162 filter_field = filter_field[len("user__"):] |
153 |
|
154 |
163 |
155 if res.group('op') == "=": |
164 if res.group('op') == "=": |
156 exclude_query = session.query(Tweet).filter(getattr(filter_obj, filter_field) == res.group('value')) |
165 exclude_query = exclude_query.filter(getattr(filter_obj, filter_field) == res.group('value')) |
157 else: |
166 else: |
158 exclude_query = session.query(Tweet).filter(getattr(filter_obj, filter_field).like(res.group('value'))) |
167 exclude_query = exclude_query.filter(getattr(filter_obj, filter_field).op('regexp')(res.group('value'))) |
159 |
168 |
|
169 test_query = select([func.count()]).where(tweet_exclude_table.c.id==bindparam('t_id')) |
160 for t in exclude_query.all(): |
170 for t in exclude_query.all(): |
161 conn.execute(tei.values(id=t.id)) |
171 get_logger().debug("t : " + repr(t)) |
|
172 if conn.execute(test_query, t_id=t.id).fetchone()[0] == 0: |
|
173 conn.execute(tei.values(id=t.id)) |
162 |
174 |
163 user_whitelist_file = options.user_whitelist |
175 user_whitelist_file = options.user_whitelist |
164 user_whitelist = None |
176 user_whitelist = None |
165 |
177 |
166 if options.listconf: |
178 if options.listconf: |
173 if snode.tag == "path": |
185 if snode.tag == "path": |
174 params['content_file'] = snode.text |
186 params['content_file'] = snode.text |
175 params['content_file_write'] = snode.text |
187 params['content_file_write'] = snode.text |
176 elif snode.tag == "project_id": |
188 elif snode.tag == "project_id": |
177 params['content_file'] = options.base_url + LDT_PROJECT_REST_API_PATH + snode.text + "/?format=json" |
189 params['content_file'] = options.base_url + LDT_PROJECT_REST_API_PATH + snode.text + "/?format=json" |
|
190 params['content_file_write'] = options.base_url + LDT_PROJECT_REST_API_PATH + snode.text + "/?format=json" |
178 params['project_id'] = snode.text |
191 params['project_id'] = snode.text |
179 elif snode.tag == "start_date": |
192 elif snode.tag == "start_date": |
180 params['start_date'] = snode.text |
193 params['start_date'] = snode.text |
181 elif snode.tag == "end_date": |
194 elif snode.tag == "end_date": |
182 params['end_date'] = snode.text |
195 params['end_date'] = snode.text |
235 if content_file and content_file.find("http") == 0: |
248 if content_file and content_file.find("http") == 0: |
236 |
249 |
237 get_logger().debug("url : " + content_file) #@UndefinedVariable |
250 get_logger().debug("url : " + content_file) #@UndefinedVariable |
238 |
251 |
239 r = requests.get(content_file, params=post_param) |
252 r = requests.get(content_file, params=post_param) |
240 get_logger().debug("url response " + repr(r) + " content " + repr(r.text)) #@UndefinedVariable |
253 #get_logger().debug("url response " + repr(r) + " content " + repr(r.text)) #@UndefinedVariable |
241 project = r.json() |
254 project = r.json() |
242 root = etree.fromstring(project["ldt"]) |
255 text_match = re.match(r"\<\?\s*xml.*?\?\>(.*)", project['ldt'], re.I|re.S) |
|
256 root = etree.fromstring(text_match.group(1) if text_match else project['ldt']) |
243 |
257 |
244 elif content_file and os.path.exists(content_file): |
258 elif content_file and os.path.exists(content_file): |
245 |
259 |
246 doc = etree.parse(content_file) |
260 doc = etree.parse(content_file) |
247 root = doc.getroot() |
261 root = doc.getroot() |