| author | Kirill Miazine <km@krot.org> |
| Mon, 15 Feb 2010 09:46:50 +0100 | |
| changeset 187 | 947375d8597b |
| parent 175 | 4f072edc51a1 |
| child 236 | 725653080973 |
| permissions | -rw-r--r-- |
import chardet import re def to_unicode(input): if type(input) == str: res = None for encoding in [chardet.detect(input)['encoding'], 'utf8', 'latin1']: try: res = unicode(input, encoding) break; except UnicodeDecodeError: pass if not res: raise Exception('UnicodeDecodeError: could not decode') return res return input # strip carriage returns def strip_cr(input): return re.sub('\r\n|\r|\n', '\n', input)