31 # return "<TweetExclude(id=%d)>" % (self.id) |
31 # return "<TweetExclude(id=%d)>" % (self.id) |
32 |
32 |
33 def parse_date(date_str): |
33 def parse_date(date_str): |
34 ts = email.utils.parsedate_tz(date_str) |
34 ts = email.utils.parsedate_tz(date_str) |
35 return datetime.datetime(*ts[0:7]) |
35 return datetime.datetime(*ts[0:7]) |
|
36 |
|
37 def parse_polemics(tw, extended_mode): |
|
38 """ |
|
39 parse polemics in text and return a list of polemic code. None if not polemic found |
|
40 """ |
|
41 polemics = {} |
|
42 for m in re.finditer("(\+\+|\-\-|\?\?|\=\=)",tw.text): |
|
43 pol_link = { |
|
44 '++' : u'OK', |
|
45 '--' : u'KO', |
|
46 '??' : u'Q', |
|
47 '==' : u'REF'}[m.group(1)] |
|
48 polemics[pol_link] = pol_link |
|
49 |
|
50 if extended_mode: |
|
51 if "?" in tw.text: |
|
52 polemics["Q"] = "Q" |
|
53 |
|
54 for entity in tw.entity_list: |
|
55 if entity.type == "entity_url": |
|
56 polemics["REF"] = "REF" |
|
57 |
|
58 if len(polemics) > 0: |
|
59 return polemics.keys() |
|
60 else: |
|
61 return None |
36 |
62 |
37 def get_options(): |
63 def get_options(): |
38 parser = OptionParser() |
64 parser = OptionParser() |
39 parser.add_option("-f", "--file", dest="filename", |
65 parser.add_option("-f", "--file", dest="filename", |
40 help="write export to file", metavar="FILE", default="project_enmi.ldt") |
66 help="write export to file", metavar="FILE", default="project_enmi.ldt") |
64 help="Cutting name", metavar="NAME", default=u"Tweets") |
90 help="Cutting name", metavar="NAME", default=u"Tweets") |
65 parser.add_option("-R", "--replace", dest="replace", action="store_true", |
91 parser.add_option("-R", "--replace", dest="replace", action="store_true", |
66 help="Replace tweet ensemble", metavar="REPLACE", default=False) |
92 help="Replace tweet ensemble", metavar="REPLACE", default=False) |
67 parser.add_option("-L", "--list-conf", dest="listconf", |
93 parser.add_option("-L", "--list-conf", dest="listconf", |
68 help="list of file to process", metavar="LIST_CONF", default=None) |
94 help="list of file to process", metavar="LIST_CONF", default=None) |
|
95 parser.add_option("-E", "--extended", dest="extended_mode", action="store_true", |
|
96 help="Trigger polemic extended mode", metavar="EXTENDED", default=False) |
|
97 |
69 |
98 |
70 set_logging_options(parser) |
99 set_logging_options(parser) |
71 |
100 |
72 |
101 |
73 return parser.parse_args() |
102 return parser.parse_args() |
252 if entity.type == u'entity_hashtag': |
281 if entity.type == u'entity_hashtag': |
253 etree.SubElement(tags_node,u"tag").text = entity.hashtag.text |
282 etree.SubElement(tags_node,u"tag").text = entity.hashtag.text |
254 |
283 |
255 meta_element = etree.SubElement(element, u'meta') |
284 meta_element = etree.SubElement(element, u'meta') |
256 |
285 |
257 polemics_element = etree.Element(u'polemics') |
286 polemics_list = parse_polemics(tw, options.extended_mode) |
258 polemic_added = False |
287 if polemics_list: |
259 for m in re.finditer("(\+\+|\-\-|\?\?|\=\=)",tw.text): |
288 polemics_element = etree.Element(u'polemics') |
260 polemic_added = True |
289 for pol in polemics_list: |
261 pol_link = { |
290 etree.SubElement(polemics_element, u'polemic').text = pol |
262 '++' : u'OK', |
|
263 '--' : u'KO', |
|
264 '??' : u'Q', |
|
265 '==' : u'REF'}[m.group(1)] |
|
266 etree.SubElement(polemics_element, u'polemic').text = pol_link |
|
267 if polemic_added: |
|
268 meta_element.append(polemics_element) |
291 meta_element.append(polemics_element) |
269 |
292 |
270 etree.SubElement(meta_element, u"source", attrib={"url":u"http://dev.twitter.com", "mimetype":u"application/json"}).text = etree.CDATA(unicode(tw.original_json)) |
293 etree.SubElement(meta_element, u"source", attrib={"url":u"http://dev.twitter.com", "mimetype":u"application/json"}).text = etree.CDATA(unicode(tw.original_json)) |
271 |
294 |
272 output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True) |
295 output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True) |
273 |
296 |
274 if content_file and content_file.find("http") == 0: |
297 if content_file and content_file.find("http") == 0: |