src/cm/utils/string.py
changeset 155 f436e2fef076
parent 154 8cb89c153140
parent 153 e4746a10edf5
child 156 6d447220fd1e
equal deleted inserted replaced
154:8cb89c153140 155:f436e2fef076
     1 import chardet
       
     2 
       
     3 def to_unicode(input):
       
     4     if type(input) == str:
       
     5         res = None
       
     6         for encoding in [chardet.detect(input)['encoding'], 'utf8', 'latin1']:
       
     7             try:
       
     8                 res = unicode(input, encoding)
       
     9                 break;
       
    10             except UnicodeDecodeError:
       
    11                 pass
       
    12         if not res:
       
    13             raise Exception('UnicodeDecodeError: could not decode')
       
    14         return res
       
    15     return input