web/lib/django/utils/encoding.py
changeset 0 0d40e90630ef
child 29 cc9b7e14412b
equal deleted inserted replaced
-1:000000000000 0:0d40e90630ef
       
     1 import types
       
     2 import urllib
       
     3 import locale
       
     4 import datetime
       
     5 import codecs
       
     6 
       
     7 from django.utils.functional import Promise
       
     8 
       
     9 try:
       
    10     from decimal import Decimal
       
    11 except ImportError:
       
    12     from django.utils._decimal import Decimal # Python 2.3 fallback
       
    13 
       
    14 
       
    15 class DjangoUnicodeDecodeError(UnicodeDecodeError):
       
    16     def __init__(self, obj, *args):
       
    17         self.obj = obj
       
    18         UnicodeDecodeError.__init__(self, *args)
       
    19 
       
    20     def __str__(self):
       
    21         original = UnicodeDecodeError.__str__(self)
       
    22         return '%s. You passed in %r (%s)' % (original, self.obj,
       
    23                 type(self.obj))
       
    24 
       
    25 class StrAndUnicode(object):
       
    26     """
       
    27     A class whose __str__ returns its __unicode__ as a UTF-8 bytestring.
       
    28 
       
    29     Useful as a mix-in.
       
    30     """
       
    31     def __str__(self):
       
    32         return self.__unicode__().encode('utf-8')
       
    33 
       
    34 def smart_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
       
    35     """
       
    36     Returns a unicode object representing 's'. Treats bytestrings using the
       
    37     'encoding' codec.
       
    38 
       
    39     If strings_only is True, don't convert (some) non-string-like objects.
       
    40     """
       
    41     if isinstance(s, Promise):
       
    42         # The input is the result of a gettext_lazy() call.
       
    43         return s
       
    44     return force_unicode(s, encoding, strings_only, errors)
       
    45 
       
    46 def is_protected_type(obj):
       
    47     """Determine if the object instance is of a protected type.
       
    48 
       
    49     Objects of protected types are preserved as-is when passed to
       
    50     force_unicode(strings_only=True).
       
    51     """
       
    52     return isinstance(obj, (
       
    53         types.NoneType,
       
    54         int, long,
       
    55         datetime.datetime, datetime.date, datetime.time,
       
    56         float, Decimal)
       
    57     )
       
    58 
       
    59 def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
       
    60     """
       
    61     Similar to smart_unicode, except that lazy instances are resolved to
       
    62     strings, rather than kept as lazy objects.
       
    63 
       
    64     If strings_only is True, don't convert (some) non-string-like objects.
       
    65     """
       
    66     if strings_only and is_protected_type(s):
       
    67         return s
       
    68     try:
       
    69         if not isinstance(s, basestring,):
       
    70             if hasattr(s, '__unicode__'):
       
    71                 s = unicode(s)
       
    72             else:
       
    73                 try:
       
    74                     s = unicode(str(s), encoding, errors)
       
    75                 except UnicodeEncodeError:
       
    76                     if not isinstance(s, Exception):
       
    77                         raise
       
    78                     # If we get to here, the caller has passed in an Exception
       
    79                     # subclass populated with non-ASCII data without special
       
    80                     # handling to display as a string. We need to handle this
       
    81                     # without raising a further exception. We do an
       
    82                     # approximation to what the Exception's standard str()
       
    83                     # output should be.
       
    84                     s = ' '.join([force_unicode(arg, encoding, strings_only,
       
    85                             errors) for arg in s])
       
    86         elif not isinstance(s, unicode):
       
    87             # Note: We use .decode() here, instead of unicode(s, encoding,
       
    88             # errors), so that if s is a SafeString, it ends up being a
       
    89             # SafeUnicode at the end.
       
    90             s = s.decode(encoding, errors)
       
    91     except UnicodeDecodeError, e:
       
    92         raise DjangoUnicodeDecodeError(s, *e.args)
       
    93     return s
       
    94 
       
    95 def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
       
    96     """
       
    97     Returns a bytestring version of 's', encoded as specified in 'encoding'.
       
    98 
       
    99     If strings_only is True, don't convert (some) non-string-like objects.
       
   100     """
       
   101     if strings_only and isinstance(s, (types.NoneType, int)):
       
   102         return s
       
   103     if isinstance(s, Promise):
       
   104         return unicode(s).encode(encoding, errors)
       
   105     elif not isinstance(s, basestring):
       
   106         try:
       
   107             return str(s)
       
   108         except UnicodeEncodeError:
       
   109             if isinstance(s, Exception):
       
   110                 # An Exception subclass containing non-ASCII data that doesn't
       
   111                 # know how to print itself properly. We shouldn't raise a
       
   112                 # further exception.
       
   113                 return ' '.join([smart_str(arg, encoding, strings_only,
       
   114                         errors) for arg in s])
       
   115             return unicode(s).encode(encoding, errors)
       
   116     elif isinstance(s, unicode):
       
   117         return s.encode(encoding, errors)
       
   118     elif s and encoding != 'utf-8':
       
   119         return s.decode('utf-8', errors).encode(encoding, errors)
       
   120     else:
       
   121         return s
       
   122 
       
   123 def iri_to_uri(iri):
       
   124     """
       
   125     Convert an Internationalized Resource Identifier (IRI) portion to a URI
       
   126     portion that is suitable for inclusion in a URL.
       
   127 
       
   128     This is the algorithm from section 3.1 of RFC 3987.  However, since we are
       
   129     assuming input is either UTF-8 or unicode already, we can simplify things a
       
   130     little from the full method.
       
   131 
       
   132     Returns an ASCII string containing the encoded result.
       
   133     """
       
   134     # The list of safe characters here is constructed from the printable ASCII
       
   135     # characters that are not explicitly excluded by the list at the end of
       
   136     # section 3.1 of RFC 3987.
       
   137     if iri is None:
       
   138         return iri
       
   139     return urllib.quote(smart_str(iri), safe='/#%[]=:;$&()+,!?*')
       
   140 
       
   141 
       
   142 # The encoding of the default system locale but falls back to the
       
   143 # given fallback encoding if the encoding is unsupported by python or could
       
   144 # not be determined.  See tickets #10335 and #5846
       
   145 try:
       
   146     DEFAULT_LOCALE_ENCODING = locale.getdefaultlocale()[1] or 'ascii'
       
   147     codecs.lookup(DEFAULT_LOCALE_ENCODING)
       
   148 except:
       
   149     DEFAULT_LOCALE_ENCODING = 'ascii'