equal
deleted
inserted
replaced
2 import re |
2 import re |
3 |
3 |
4 def to_unicode(input): |
4 def to_unicode(input): |
5 if type(input) == str: |
5 if type(input) == str: |
6 res = None |
6 res = None |
7 for encoding in [chardet.detect(input)['encoding'], 'utf8', 'latin1']: |
7 encodings = ['utf8', 'latin1'] |
|
8 doc_enc = chardet.detect(input)['encoding'] |
|
9 if doc_enc: |
|
10 encodings = [doc_enc,] + encodings |
|
11 for encoding in encodings: |
8 try: |
12 try: |
9 res = unicode(input, encoding) |
13 res = unicode(input, encoding) |
10 break; |
14 break; |
11 except UnicodeDecodeError: |
15 except UnicodeDecodeError: |
12 pass |
16 pass |