--- a/script/lib/iri_tweet/export_twitter_alchemy.py Mon Jan 31 11:47:45 2011 +0100
+++ b/script/lib/iri_tweet/export_twitter_alchemy.py Tue Feb 01 18:49:43 2011 +0100
@@ -34,6 +34,32 @@
ts = email.utils.parsedate_tz(date_str)
return datetime.datetime(*ts[0:7])
+def parse_polemics(tw, extended_mode):
+ """
+ parse polemics in text and return a list of polemic code. None if not polemic found
+ """
+ polemics = {}
+ for m in re.finditer("(\+\+|\-\-|\?\?|\=\=)",tw.text):
+ pol_link = {
+ '++' : u'OK',
+ '--' : u'KO',
+ '??' : u'Q',
+ '==' : u'REF'}[m.group(1)]
+ polemics[pol_link] = pol_link
+
+ if extended_mode:
+ if "?" in tw.text:
+ polemics["Q"] = "Q"
+
+ for entity in tw.entity_list:
+ if entity.type == "entity_url":
+ polemics["REF"] = "REF"
+
+ if len(polemics) > 0:
+ return polemics.keys()
+ else:
+ return None
+
def get_options():
parser = OptionParser()
parser.add_option("-f", "--file", dest="filename",
@@ -66,6 +92,9 @@
help="Replace tweet ensemble", metavar="REPLACE", default=False)
parser.add_option("-L", "--list-conf", dest="listconf",
help="list of file to process", metavar="LIST_CONF", default=None)
+ parser.add_option("-E", "--extended", dest="extended_mode", action="store_true",
+ help="Trigger polemic extended mode", metavar="EXTENDED", default=False)
+
set_logging_options(parser)
@@ -254,19 +283,13 @@
meta_element = etree.SubElement(element, u'meta')
- polemics_element = etree.Element(u'polemics')
- polemic_added = False
- for m in re.finditer("(\+\+|\-\-|\?\?|\=\=)",tw.text):
- polemic_added = True
- pol_link = {
- '++' : u'OK',
- '--' : u'KO',
- '??' : u'Q',
- '==' : u'REF'}[m.group(1)]
- etree.SubElement(polemics_element, u'polemic').text = pol_link
- if polemic_added:
+ polemics_list = parse_polemics(tw, options.extended_mode)
+ if polemics_list:
+ polemics_element = etree.Element(u'polemics')
+ for pol in polemics_list:
+ etree.SubElement(polemics_element, u'polemic').text = pol
meta_element.append(polemics_element)
-
+
etree.SubElement(meta_element, u"source", attrib={"url":u"http://dev.twitter.com", "mimetype":u"application/json"}).text = etree.CDATA(unicode(tw.original_json))
output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=True, xml_declaration=True)