src/cm/utils/string_utils.py
author gibus
Wed, 11 Sep 2013 23:13:01 +0200
changeset 532 0bad3613f59d
parent 236 725653080973
permissions -rw-r--r--
Reverse to YUI 3.0.0 since with YUI.3.10.3, comment content including words 'paragraph' or 'section' do not show up on Firefox, this is weird and has to be investigated.

import chardet
import re

def to_unicode(input):
    if type(input) == str:
        res = None
        encodings = ['utf8', 'latin1']
        doc_enc = chardet.detect(input)['encoding']
        if doc_enc:
            encodings = [doc_enc,] + encodings  
        for encoding in encodings:
            try:
                res = unicode(input, encoding)
                break;
            except UnicodeDecodeError:
                pass
        if not res:
            raise Exception('UnicodeDecodeError: could not decode')
        return res
    return input

# strip carriage returns
def strip_cr(input):
    return re.sub('\r\n|\r|\n', '\n', input)