42 res = reg.search(item) |
42 res = reg.search(item) |
43 if res: |
43 if res: |
44 get_logger().debug("re_fn : " + repr(expr) + "~" + repr(item)) #@UndefinedVariable |
44 get_logger().debug("re_fn : " + repr(expr) + "~" + repr(item)) #@UndefinedVariable |
45 return res is not None |
45 return res is not None |
46 |
46 |
47 def parse_polemics(tw, extended_mode): |
47 def parse_polemics_1(tw, extended_mode): |
48 """ |
48 """ |
49 parse polemics in text and return a list of polemic code. None if not polemic found |
49 parse polemics in text and return a list of polemic code. None if not polemic found |
50 """ |
50 """ |
51 polemics = {} |
51 polemics = {} |
52 for m in re.finditer("(\+\+|\-\-|\?\?|\=\=)",tw.text): |
52 for m in re.finditer("(\+\+|\-\-|\?\?|\=\=)",tw.text): |
68 if len(polemics) > 0: |
68 if len(polemics) > 0: |
69 return polemics.keys() |
69 return polemics.keys() |
70 else: |
70 else: |
71 return None |
71 return None |
72 |
72 |
|
73 def parse_polemics_2(tw, extended_mode): |
|
74 """ |
|
75 parse polemics in text and return a list of polemic code. None if not polemic found |
|
76 """ |
|
77 polemics = {} |
|
78 for m in re.finditer("(\+\+|\!\!|\?\?|\=\=)",tw.text): |
|
79 pol_link = { |
|
80 '++' : u'OK', |
|
81 '!!' : u'KO', |
|
82 '??' : u'Q', |
|
83 '==' : u'REF'}[m.group(1)] |
|
84 polemics[pol_link] = pol_link |
|
85 |
|
86 if extended_mode: |
|
87 if "?" in tw.text: |
|
88 polemics["Q"] = "Q" |
|
89 |
|
90 for entity in tw.entity_list: |
|
91 if entity.type == "entity_url": |
|
92 polemics["REF"] = "REF" |
|
93 |
|
94 if len(polemics) > 0: |
|
95 return polemics.keys() |
|
96 else: |
|
97 return None |
|
98 |
|
99 protocol_version_map = { |
|
100 "1" : parse_polemics_1, |
|
101 "2" : parse_polemics_2 |
|
102 } |
|
103 |
73 def get_options(): |
104 def get_options(): |
74 |
105 |
75 parser = argparse.ArgumentParser(description="All date should be given using iso8601 format. If no timezone is used, the date is considered as UTC") |
106 parser = argparse.ArgumentParser(description="All date should be given using iso8601 format. If no timezone is used, the date is considered as UTC") |
76 |
107 |
77 parser.add_argument("-f", "--file", dest="filename", |
108 parser.add_argument("-f", "--file", dest="filename", |
78 help="write export to file", metavar="FILE", default="project.ldt") |
109 help="write export to file", metavar="FILE", default="project.ldt") |
79 parser.add_argument("-d", "--database", dest="database", |
110 parser.add_argument("-d", "--database", dest="database", |
80 help="Input database", metavar="DATABASE") |
111 help="Input database", metavar="DATABASE") |
|
112 parser.add_argument("-a", "--annotation-protocol", dest="protocol_version", |
|
113 help="annotation protocol version", metavar="PROTOCOL_VERSION", |
|
114 default="2") |
81 parser.add_argument("-s", "--start-date", dest="start_date", |
115 parser.add_argument("-s", "--start-date", dest="start_date", |
82 help="start date", metavar="START_DATE", default=None) |
116 help="start date", metavar="START_DATE", default=None) |
83 parser.add_argument("-e", "--end-date", dest="end_date", |
117 parser.add_argument("-e", "--end-date", dest="end_date", |
84 help="end date", metavar="END_DATE", default=None) |
118 help="end date", metavar="END_DATE", default=None) |
85 parser.add_argument("-I", "--content-file", dest="content_file", |
119 parser.add_argument("-I", "--content-file", dest="content_file", |
455 if entity.type == u'entity_hashtag': |
489 if entity.type == u'entity_hashtag': |
456 etree.SubElement(tags_node,u"tag").text = entity.hashtag.text |
490 etree.SubElement(tags_node,u"tag").text = entity.hashtag.text |
457 |
491 |
458 meta_element = etree.SubElement(element, u'meta') |
492 meta_element = etree.SubElement(element, u'meta') |
459 |
493 |
|
494 parse_polemics = protocol_version_map.get(options.protocol_version, parse_polemics_2) |
460 polemics_list = parse_polemics(tw, options.extended_mode) |
495 polemics_list = parse_polemics(tw, options.extended_mode) |
461 if polemics_list: |
496 if polemics_list: |
462 polemics_element = etree.Element(u'polemics') |
497 polemics_element = etree.Element(u'polemics') |
463 for pol in polemics_list: |
498 for pol in polemics_list: |
464 etree.SubElement(polemics_element, u'polemic').text = pol |
499 etree.SubElement(polemics_element, u'polemic').text = pol |