blinkster: comparison web/lib/django/utils/encoding.py

equal deleted inserted replaced

-:b758351d191f
+:cc9b7e14412b
 import types
 import urllib
 import locale
 import datetime
 import codecs
+from decimal import Decimal
 from django.utils.functional import Promise
-try:
-from decimal import Decimal
-except ImportError:
-from django.utils._decimal import Decimal # Python 2.3 fallback
 class DjangoUnicodeDecodeError(UnicodeDecodeError):
 def __init__(self, obj, *args):
 self.obj = obj
 UnicodeDecodeError.__init__(self, *args)
 # Note: We use .decode() here, instead of unicode(s, encoding,
 # errors), so that if s is a SafeString, it ends up being a
 # SafeUnicode at the end.
 s = s.decode(encoding, errors)
 except UnicodeDecodeError, e:
-raise DjangoUnicodeDecodeError(s, *e.args)
+if not isinstance(s, Exception):
+raise DjangoUnicodeDecodeError(s, *e.args)
+else:
+# If we get to here, the caller has passed in an Exception
+# subclass populated with non-ASCII bytestring data without a
+# working unicode method. Try to handle this without raising a
+# further exception by individually forcing the exception args
+# to unicode.
+s = ' '.join([force_unicode(arg, encoding, strings_only,
+errors) for arg in s])
 return s
 def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
 """
 Returns a bytestring version of 's', encoded as specified in 'encoding'.
 assuming input is either UTF-8 or unicode already, we can simplify things a
 little from the full method.
 Returns an ASCII string containing the encoded result.
 """
-# The list of safe characters here is constructed from the printable ASCII
+# The list of safe characters here is constructed from the "reserved" and
-# characters that are not explicitly excluded by the list at the end of
+# "unreserved" characters specified in sections 2.2 and 2.3 of RFC 3986:
-# section 3.1 of RFC 3987.
+#     reserved    = gen-delims / sub-delims
+#     gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+#     sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
+#                   / "*" / "+" / "," / ";" / "="
+#     unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
+# Of the unreserved characters, urllib.quote already considers all but
+# the ~ safe.
+# The % character is also added to the list of safe characters here, as the
+# end of section 3.1 of RFC 3987 specifically mentions that % must not be
+# converted.
 if iri is None:
 return iri
-return urllib.quote(smart_str(iri), safe='/#%[]=:;$&()+,!?*')
+return urllib.quote(smart_str(iri), safe="/#%[]=:;$&()+,!?*@'~")
 # The encoding of the default system locale but falls back to the
 # given fallback encoding if the encoding is unsupported by python or could
 # not be determined.  See tickets #10335 and #5846

changeset 29	cc9b7e14412b
parent 0	0d40e90630ef