web/lib/django/utils/encoding.py
changeset 29 cc9b7e14412b
parent 0 0d40e90630ef
--- a/web/lib/django/utils/encoding.py	Wed May 19 17:43:59 2010 +0200
+++ b/web/lib/django/utils/encoding.py	Tue May 25 02:43:45 2010 +0200
@@ -3,15 +3,10 @@
 import locale
 import datetime
 import codecs
+from decimal import Decimal
 
 from django.utils.functional import Promise
 
-try:
-    from decimal import Decimal
-except ImportError:
-    from django.utils._decimal import Decimal # Python 2.3 fallback
-
-
 class DjangoUnicodeDecodeError(UnicodeDecodeError):
     def __init__(self, obj, *args):
         self.obj = obj
@@ -89,7 +84,16 @@
             # SafeUnicode at the end.
             s = s.decode(encoding, errors)
     except UnicodeDecodeError, e:
-        raise DjangoUnicodeDecodeError(s, *e.args)
+        if not isinstance(s, Exception):
+            raise DjangoUnicodeDecodeError(s, *e.args)
+        else:
+            # If we get to here, the caller has passed in an Exception
+            # subclass populated with non-ASCII bytestring data without a
+            # working unicode method. Try to handle this without raising a
+            # further exception by individually forcing the exception args
+            # to unicode.
+            s = ' '.join([force_unicode(arg, encoding, strings_only,
+                    errors) for arg in s])
     return s
 
 def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
@@ -131,12 +135,21 @@
 
     Returns an ASCII string containing the encoded result.
     """
-    # The list of safe characters here is constructed from the printable ASCII
-    # characters that are not explicitly excluded by the list at the end of
-    # section 3.1 of RFC 3987.
+    # The list of safe characters here is constructed from the "reserved" and
+    # "unreserved" characters specified in sections 2.2 and 2.3 of RFC 3986:
+    #     reserved    = gen-delims / sub-delims
+    #     gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@"
+    #     sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
+    #                   / "*" / "+" / "," / ";" / "="
+    #     unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
+    # Of the unreserved characters, urllib.quote already considers all but
+    # the ~ safe.
+    # The % character is also added to the list of safe characters here, as the
+    # end of section 3.1 of RFC 3987 specifically mentions that % must not be
+    # converted.
     if iri is None:
         return iri
-    return urllib.quote(smart_str(iri), safe='/#%[]=:;$&()+,!?*')
+    return urllib.quote(smart_str(iri), safe="/#%[]=:;$&()+,!?*@'~")
 
 
 # The encoding of the default system locale but falls back to the