src/cm/utils/string_utils.py
changeset 175 4f072edc51a1
parent 149 0f2c5744b39b
child 236 725653080973
equal deleted inserted replaced
174:805b9195c511 175:4f072edc51a1
     1 import chardet
     1 import chardet
       
     2 import re
     2 
     3 
     3 def to_unicode(input):
     4 def to_unicode(input):
     4     if type(input) == str:
     5     if type(input) == str:
     5         res = None
     6         res = None
     6         for encoding in [chardet.detect(input)['encoding'], 'utf8', 'latin1']:
     7         for encoding in [chardet.detect(input)['encoding'], 'utf8', 'latin1']:
    11                 pass
    12                 pass
    12         if not res:
    13         if not res:
    13             raise Exception('UnicodeDecodeError: could not decode')
    14             raise Exception('UnicodeDecodeError: could not decode')
    14         return res
    15         return res
    15     return input
    16     return input
       
    17 
       
    18 # strip carriage returns
       
    19 def strip_cr(input):
       
    20     return re.sub('\r\n|\r|\n', '\n', input)
       
    21