web/lib/django/utils/encoding.py
changeset 29 cc9b7e14412b
parent 0 0d40e90630ef
equal deleted inserted replaced
28:b758351d191f 29:cc9b7e14412b
     1 import types
     1 import types
     2 import urllib
     2 import urllib
     3 import locale
     3 import locale
     4 import datetime
     4 import datetime
     5 import codecs
     5 import codecs
       
     6 from decimal import Decimal
     6 
     7 
     7 from django.utils.functional import Promise
     8 from django.utils.functional import Promise
     8 
       
     9 try:
       
    10     from decimal import Decimal
       
    11 except ImportError:
       
    12     from django.utils._decimal import Decimal # Python 2.3 fallback
       
    13 
       
    14 
     9 
    15 class DjangoUnicodeDecodeError(UnicodeDecodeError):
    10 class DjangoUnicodeDecodeError(UnicodeDecodeError):
    16     def __init__(self, obj, *args):
    11     def __init__(self, obj, *args):
    17         self.obj = obj
    12         self.obj = obj
    18         UnicodeDecodeError.__init__(self, *args)
    13         UnicodeDecodeError.__init__(self, *args)
    87             # Note: We use .decode() here, instead of unicode(s, encoding,
    82             # Note: We use .decode() here, instead of unicode(s, encoding,
    88             # errors), so that if s is a SafeString, it ends up being a
    83             # errors), so that if s is a SafeString, it ends up being a
    89             # SafeUnicode at the end.
    84             # SafeUnicode at the end.
    90             s = s.decode(encoding, errors)
    85             s = s.decode(encoding, errors)
    91     except UnicodeDecodeError, e:
    86     except UnicodeDecodeError, e:
    92         raise DjangoUnicodeDecodeError(s, *e.args)
    87         if not isinstance(s, Exception):
       
    88             raise DjangoUnicodeDecodeError(s, *e.args)
       
    89         else:
       
    90             # If we get to here, the caller has passed in an Exception
       
    91             # subclass populated with non-ASCII bytestring data without a
       
    92             # working unicode method. Try to handle this without raising a
       
    93             # further exception by individually forcing the exception args
       
    94             # to unicode.
       
    95             s = ' '.join([force_unicode(arg, encoding, strings_only,
       
    96                     errors) for arg in s])
    93     return s
    97     return s
    94 
    98 
    95 def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
    99 def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
    96     """
   100     """
    97     Returns a bytestring version of 's', encoded as specified in 'encoding'.
   101     Returns a bytestring version of 's', encoded as specified in 'encoding'.
   129     assuming input is either UTF-8 or unicode already, we can simplify things a
   133     assuming input is either UTF-8 or unicode already, we can simplify things a
   130     little from the full method.
   134     little from the full method.
   131 
   135 
   132     Returns an ASCII string containing the encoded result.
   136     Returns an ASCII string containing the encoded result.
   133     """
   137     """
   134     # The list of safe characters here is constructed from the printable ASCII
   138     # The list of safe characters here is constructed from the "reserved" and
   135     # characters that are not explicitly excluded by the list at the end of
   139     # "unreserved" characters specified in sections 2.2 and 2.3 of RFC 3986:
   136     # section 3.1 of RFC 3987.
   140     #     reserved    = gen-delims / sub-delims
       
   141     #     gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@"
       
   142     #     sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
       
   143     #                   / "*" / "+" / "," / ";" / "="
       
   144     #     unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
       
   145     # Of the unreserved characters, urllib.quote already considers all but
       
   146     # the ~ safe.
       
   147     # The % character is also added to the list of safe characters here, as the
       
   148     # end of section 3.1 of RFC 3987 specifically mentions that % must not be
       
   149     # converted.
   137     if iri is None:
   150     if iri is None:
   138         return iri
   151         return iri
   139     return urllib.quote(smart_str(iri), safe='/#%[]=:;$&()+,!?*')
   152     return urllib.quote(smart_str(iri), safe="/#%[]=:;$&()+,!?*@'~")
   140 
   153 
   141 
   154 
   142 # The encoding of the default system locale but falls back to the
   155 # The encoding of the default system locale but falls back to the
   143 # given fallback encoding if the encoding is unsupported by python or could
   156 # given fallback encoding if the encoding is unsupported by python or could
   144 # not be determined.  See tickets #10335 and #5846
   157 # not be determined.  See tickets #10335 and #5846