1 #!/usr/bin/env python |
1 #!/usr/bin/env python |
2 # coding=utf-8 |
2 # coding=utf-8 |
3 |
3 |
4 from lxml import etree |
4 from lxml import etree |
5 from models import * |
5 from optparse import OptionParser #@UnresolvedImport |
6 from optparse import OptionParser |
6 from sqlalchemy import Table, Column, BigInteger, MetaData |
7 from sqlalchemy import Table, Column, Integer, BigInteger, String, MetaData, \ |
7 from sqlalchemy.orm import sessionmaker |
8 ForeignKey |
8 from utils import parse_date, set_logging_options, set_logging, get_filter_query |
9 from sqlalchemy.orm import sessionmaker, mapper |
9 from models import setup_database |
10 from sqlalchemy.sql import select, or_ |
|
11 from utils import * |
|
12 import datetime |
10 import datetime |
13 import email.utils |
|
14 import logging |
11 import logging |
15 import os |
|
16 import os.path |
12 import os.path |
17 import re |
|
18 import re |
13 import re |
19 import sys |
14 import sys |
20 import time |
15 import time |
21 import uuid |
16 import uuid #@UnresolvedImport |
22 import httplib2 |
17 import httplib2 |
23 import anyjson |
18 import anyjson |
24 import StringIO |
|
25 |
19 |
26 #class TweetExclude(object): |
20 #class TweetExclude(object): |
27 # def __init__(self, id): |
21 # def __init__(self, id): |
28 # self.id = id |
22 # self.id = id |
29 # |
23 # |
30 # def __repr__(self): |
24 # def __repr__(self): |
31 # return "<TweetExclude(id=%d)>" % (self.id) |
25 # return "<TweetExclude(id=%d)>" % (self.id) |
32 |
26 |
33 def parse_date(date_str): |
|
34 ts = email.utils.parsedate_tz(date_str) |
|
35 return datetime.datetime(*ts[0:7]) |
|
36 |
27 |
37 def parse_polemics(tw, extended_mode): |
28 def parse_polemics(tw, extended_mode): |
38 """ |
29 """ |
39 parse polemics in text and return a list of polemic code. None if not polemic found |
30 parse polemics in text and return a list of polemic code. None if not polemic found |
40 """ |
31 """ |
162 'hashtags' : options.hashtag |
153 'hashtags' : options.hashtag |
163 }] |
154 }] |
164 |
155 |
165 for params in parameters: |
156 for params in parameters: |
166 |
157 |
167 logging.debug("PARAMETERS " + repr(params)) |
158 logging.debug("PARAMETERS " + repr(params)) #@UndefinedVariable |
168 |
159 |
169 start_date_str = params.get("start_date",None) |
160 start_date_str = params.get("start_date",None) |
170 end_date_str = params.get("end_date", None) |
161 end_date_str = params.get("end_date", None) |
171 duration = params.get("duration", None) |
162 duration = params.get("duration", None) |
172 content_file = params.get("content_file", None) |
163 content_file = params.get("content_file", None) |
192 |
183 |
193 #to do : analyse situation ldt or iri ? filename set or not ? |
184 #to do : analyse situation ldt or iri ? filename set or not ? |
194 |
185 |
195 if content_file and content_file.find("http") == 0: |
186 if content_file and content_file.find("http") == 0: |
196 |
187 |
197 logging.debug("url : " + content_file) |
188 logging.debug("url : " + content_file) #@UndefinedVariable |
198 |
189 |
199 h = httplib2.Http() |
190 h = httplib2.Http() |
200 resp, content = h.request(content_file) |
191 resp, content = h.request(content_file) |
201 |
192 |
202 logging.debug("url response " + repr(resp) + " content " + repr(content)) |
193 logging.debug("url response " + repr(resp) + " content " + repr(content)) #@UndefinedVariable |
203 |
194 |
204 project = anyjson.deserialize(content) |
195 project = anyjson.deserialize(content) |
205 root = etree.fromstring(project["ldt"]) |
196 root = etree.fromstring(project["ldt"]) |
206 |
197 |
207 elif content_file and os.path.exists(content_file): |
198 elif content_file and os.path.exists(content_file): |
212 |
203 |
213 if root is None: |
204 if root is None: |
214 |
205 |
215 root = etree.Element(u"iri") |
206 root = etree.Element(u"iri") |
216 |
207 |
217 project = etree.SubElement(root, u"project", {u"abstract":u"Twitter comments on ENMI",u"title":u"Twitter comments on ENMI 2009", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())}) |
208 project = etree.SubElement(root, u"project", {u"abstract":u"Polemics Tweets",u"title":u"Polemic Tweets", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())}) |
218 |
209 |
219 medias = etree.SubElement(root, u"medias") |
210 medias = etree.SubElement(root, u"medias") |
220 media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""}) |
211 media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(options.content), u"video":unicode(options.video), u"id":unicode(options.content_id), u"extra":u""}) |
221 |
212 |
222 annotations = etree.SubElement(root, u"annotations") |
213 annotations = etree.SubElement(root, u"annotations") |
254 ensembles_node = etree.SubElement(body_node, u"ensembles") |
245 ensembles_node = etree.SubElement(body_node, u"ensembles") |
255 ensemble_parent = ensembles_node |
246 ensemble_parent = ensembles_node |
256 |
247 |
257 |
248 |
258 if ensemble_parent is None: |
249 if ensemble_parent is None: |
259 logging.error("Can not process file") |
250 logging.error("Can not process file") #@UndefinedVariable |
260 sys.exit() |
251 sys.exit() |
261 |
252 |
262 if options.replace: |
253 if options.replace: |
263 for ens in ensemble_parent.iterchildren(tag=u"ensemble"): |
254 for ens in ensemble_parent.iterchildren(tag=u"ensemble"): |
264 if ens.get("id","").startswith("tweet_"): |
255 if ens.get("id","").startswith("tweet_"): |
309 |
300 |
310 if content_file and content_file.find("http") == 0: |
301 if content_file and content_file.find("http") == 0: |
311 |
302 |
312 project["ldt"] = output_data |
303 project["ldt"] = output_data |
313 body = anyjson.serialize(project) |
304 body = anyjson.serialize(project) |
314 logging.debug("write http " + content_file) |
305 logging.debug("write http " + content_file) #@UndefinedVariable |
315 logging.debug("write http " + repr(body)) |
306 logging.debug("write http " + repr(body)) #@UndefinedVariable |
316 h = httplib2.Http() |
307 h = httplib2.Http() |
317 resp, content = h.request(content_file, "PUT", headers={'content-type':'application/json'}, body=body) |
308 resp, content = h.request(content_file, "PUT", headers={'content-type':'application/json'}, body=body) |
318 logging.debug("write http " + repr(resp) + " content " + content) |
309 logging.debug("write http " + repr(resp) + " content " + content) #@UndefinedVariable |
319 else: |
310 else: |
320 if content_file and os.path.exists(content_file): |
311 if content_file and os.path.exists(content_file): |
321 dest_file_name = content_file |
312 dest_file_name = content_file |
322 else: |
313 else: |
323 dest_file_name = options.filename |
314 dest_file_name = options.filename |
324 |
315 |
325 logging.debug("WRITE : " + dest_file_name) |
316 logging.debug("WRITE : " + dest_file_name) #@UndefinedVariable |
326 output = open(dest_file_name, "w") |
317 output = open(dest_file_name, "w") |
327 output.write(output_data) |
318 output.write(output_data) |
328 output.flush() |
319 output.flush() |
329 output.close() |
320 output.close() |
330 |
321 |