script/utils/export_twitter_alchemy.py
changeset 1295 03d2aa7b4967
parent 1153 02722ce55cf8
child 1308 ef42d4f12cfc
--- a/script/utils/export_twitter_alchemy.py	Tue Nov 17 12:15:00 2015 +0100
+++ b/script/utils/export_twitter_alchemy.py	Wed Nov 18 15:39:05 2015 +0100
@@ -44,7 +44,7 @@
         get_logger().debug("re_fn : " + repr(expr) + "~" + repr(item)) #@UndefinedVariable
     return res is not None
 
-def parse_polemics(tw, extended_mode):
+def parse_polemics_1(tw, extended_mode):
     """
     parse polemics in text and return a list of polemic code. None if not polemic found
     """
@@ -70,6 +70,37 @@
     else:
         return None
 
+def parse_polemics_2(tw, extended_mode):
+    """
+    parse polemics in text and return a list of polemic code. None if not polemic found
+    """
+    polemics = {}
+    for m in re.finditer("(\+\+|\!\!|\?\?|\=\=)",tw.text):
+        pol_link = {
+            '++' : u'OK',
+            '!!' : u'KO',
+            '??' : u'Q',
+            '==' : u'REF'}[m.group(1)]
+        polemics[pol_link] = pol_link
+
+    if extended_mode:
+        if "?" in tw.text:
+            polemics["Q"] = "Q"
+
+        for entity in tw.entity_list:
+            if entity.type == "entity_url":
+                polemics["REF"] = "REF"
+
+    if len(polemics) > 0:
+        return polemics.keys()
+    else:
+        return None
+
+protocol_version_map = {
+    "1" : parse_polemics_1,
+    "2" : parse_polemics_2
+}
+
 def get_options():
 
     parser = argparse.ArgumentParser(description="All date should be given using iso8601 format. If no timezone is used, the date is considered as UTC")
@@ -78,6 +109,9 @@
                       help="write export to file", metavar="FILE", default="project.ldt")
     parser.add_argument("-d", "--database", dest="database",
                       help="Input database", metavar="DATABASE")
+    parser.add_argument("-a", "--annotation-protocol", dest="protocol_version",
+                      help="annotation protocol version", metavar="PROTOCOL_VERSION",
+                      default="2")
     parser.add_argument("-s", "--start-date", dest="start_date",
                       help="start date", metavar="START_DATE", default=None)
     parser.add_argument("-e", "--end-date", dest="end_date",
@@ -457,6 +491,7 @@
 
                     meta_element = etree.SubElement(element, u'meta')
 
+                    parse_polemics = protocol_version_map.get(options.protocol_version, parse_polemics_2)
                     polemics_list = parse_polemics(tw, options.extended_mode)
                     if polemics_list:
                         polemics_element = etree.Element(u'polemics')