equal
deleted
inserted
replaced
1 import chardet |
1 import chardet |
|
2 import re |
2 |
3 |
3 def to_unicode(input): |
4 def to_unicode(input): |
4 if type(input) == str: |
5 if type(input) == str: |
5 res = None |
6 res = None |
6 for encoding in [chardet.detect(input)['encoding'], 'utf8', 'latin1']: |
7 for encoding in [chardet.detect(input)['encoding'], 'utf8', 'latin1']: |
11 pass |
12 pass |
12 if not res: |
13 if not res: |
13 raise Exception('UnicodeDecodeError: could not decode') |
14 raise Exception('UnicodeDecodeError: could not decode') |
14 return res |
15 return res |
15 return input |
16 return input |
|
17 |
|
18 # strip carriage returns |
|
19 def strip_cr(input): |
|
20 return re.sub('\r\n|\r|\n', '\n', input) |
|
21 |