--- a/script/utils/export_twitter_alchemy.py Tue Nov 17 12:15:00 2015 +0100
+++ b/script/utils/export_twitter_alchemy.py Wed Nov 18 15:39:05 2015 +0100
@@ -44,7 +44,7 @@
get_logger().debug("re_fn : " + repr(expr) + "~" + repr(item)) #@UndefinedVariable
return res is not None
-def parse_polemics(tw, extended_mode):
+def parse_polemics_1(tw, extended_mode):
"""
parse polemics in text and return a list of polemic code. None if not polemic found
"""
@@ -70,6 +70,37 @@
else:
return None
+def parse_polemics_2(tw, extended_mode):
+ """
+ parse polemics in text and return a list of polemic code. None if not polemic found
+ """
+ polemics = {}
+ for m in re.finditer("(\+\+|\!\!|\?\?|\=\=)",tw.text):
+ pol_link = {
+ '++' : u'OK',
+ '!!' : u'KO',
+ '??' : u'Q',
+ '==' : u'REF'}[m.group(1)]
+ polemics[pol_link] = pol_link
+
+ if extended_mode:
+ if "?" in tw.text:
+ polemics["Q"] = "Q"
+
+ for entity in tw.entity_list:
+ if entity.type == "entity_url":
+ polemics["REF"] = "REF"
+
+ if len(polemics) > 0:
+ return polemics.keys()
+ else:
+ return None
+
+protocol_version_map = {
+ "1" : parse_polemics_1,
+ "2" : parse_polemics_2
+}
+
def get_options():
parser = argparse.ArgumentParser(description="All date should be given using iso8601 format. If no timezone is used, the date is considered as UTC")
@@ -78,6 +109,9 @@
help="write export to file", metavar="FILE", default="project.ldt")
parser.add_argument("-d", "--database", dest="database",
help="Input database", metavar="DATABASE")
+ parser.add_argument("-a", "--annotation-protocol", dest="protocol_version",
+ help="annotation protocol version", metavar="PROTOCOL_VERSION",
+ default="2")
parser.add_argument("-s", "--start-date", dest="start_date",
help="start date", metavar="START_DATE", default=None)
parser.add_argument("-e", "--end-date", dest="end_date",
@@ -457,6 +491,7 @@
meta_element = etree.SubElement(element, u'meta')
+ parse_polemics = protocol_version_map.get(options.protocol_version, parse_polemics_2)
polemics_list = parse_polemics(tw, options.extended_mode)
if polemics_list:
polemics_element = etree.Element(u'polemics')