script/utils/export_twitter_alchemy.py
changeset 1480 d2d28468b410
parent 1308 ef42d4f12cfc
child 1496 184372ec27e2
equal deleted inserted replaced
1479:2b510a3a0f7d 1480:d2d28468b410
    94     if len(polemics) > 0:
    94     if len(polemics) > 0:
    95         return polemics.keys()
    95         return polemics.keys()
    96     else:
    96     else:
    97         return None
    97         return None
    98 
    98 
       
    99 def parse_polemics_3(tw, extended_mode):
       
   100     """
       
   101     parse polemics in text and return a list of polemic code. None if not polemic found
       
   102     """
       
   103     polemics = {}
       
   104     for m in re.finditer("(\+\+|\?\?|\*\*|\=\=)",tw.text):
       
   105         pol_link = {
       
   106             '++' : u'OK',
       
   107             '??' : u'KO',
       
   108             '**' : u'REF',
       
   109             '==' : u'Q'}[m.group(1)]
       
   110         polemics[pol_link] = pol_link
       
   111 
       
   112     if extended_mode:
       
   113         for entity in tw.entity_list:
       
   114             if entity.type == "entity_url":
       
   115                 polemics["REF"] = "REF"
       
   116 
       
   117     if len(polemics) > 0:
       
   118         return polemics.keys()
       
   119     else:
       
   120         return None
       
   121 
       
   122 
    99 protocol_version_map = {
   123 protocol_version_map = {
   100     "1" : parse_polemics_1,
   124     "1" : parse_polemics_1,
   101     "2" : parse_polemics_2
   125     "2" : parse_polemics_2,
       
   126     "3" : parse_polemics_3
   102 }
   127 }
   103 
   128 
   104 def get_options():
   129 def get_options():
   105 
   130 
   106     parser = argparse.ArgumentParser(description="All date should be given using iso8601 format. If no timezone is used, the date is considered as UTC")
   131     parser = argparse.ArgumentParser(description="All date should be given using iso8601 format. If no timezone is used, the date is considered as UTC")