python/lib/Lib/site-packages/django/utils/encoding.py - platform/tools/idea - Git at Google

 import types
 import urllib
 import locale
 import datetime
 import codecs
 from decimal import Decimal

 from django.utils.functional import Promise

 class DjangoUnicodeDecodeError(UnicodeDecodeError):
     def __init__(self, obj, *args):
         self.obj = obj
         UnicodeDecodeError.__init__(self, *args)

     def __str__(self):
         original = UnicodeDecodeError.__str__(self)
         return '%s. You passed in %r (%s)' % (original, self.obj,
                 type(self.obj))

 class StrAndUnicode(object):
     """
     A class whose __str__ returns its __unicode__ as a UTF-8 bytestring.

     Useful as a mix-in.
     """
     def __str__(self):
         return self.__unicode__().encode('utf-8')

 def smart_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
     """
     Returns a unicode object representing 's'. Treats bytestrings using the
     'encoding' codec.

     If strings_only is True, don't convert (some) non-string-like objects.
     """
     if isinstance(s, Promise):
         # The input is the result of a gettext_lazy() call.
         return s
     return force_unicode(s, encoding, strings_only, errors)

 def is_protected_type(obj):
     """Determine if the object instance is of a protected type.

     Objects of protected types are preserved as-is when passed to
     force_unicode(strings_only=True).
     """
     return isinstance(obj, (
         types.NoneType,
         int, long,
         datetime.datetime, datetime.date, datetime.time,
         float, Decimal)
     )

 def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
     """
     Similar to smart_unicode, except that lazy instances are resolved to
     strings, rather than kept as lazy objects.

     If strings_only is True, don't convert (some) non-string-like objects.
     """
     # Handle the common case first, saves 30-40% in performance when s
     # is an instance of unicode. This function gets called often in that
     # setting.
     if isinstance(s, unicode):
         return s
     if strings_only and is_protected_type(s):
         return s
     try:
         if not isinstance(s, basestring,):
             if hasattr(s, '__unicode__'):
                 s = unicode(s)
             else:
                 try:
                     s = unicode(str(s), encoding, errors)
                 except UnicodeEncodeError:
                     if not isinstance(s, Exception):
                         raise
                     # If we get to here, the caller has passed in an Exception
                     # subclass populated with non-ASCII data without special
                     # handling to display as a string. We need to handle this
                     # without raising a further exception. We do an
                     # approximation to what the Exception's standard str()
                     # output should be.
                     s = ' '.join([force_unicode(arg, encoding, strings_only,
                             errors) for arg in s])
         elif not isinstance(s, unicode):
             # Note: We use .decode() here, instead of unicode(s, encoding,
             # errors), so that if s is a SafeString, it ends up being a
             # SafeUnicode at the end.
             s = s.decode(encoding, errors)
     except UnicodeDecodeError, e:
         if not isinstance(s, Exception):
             raise DjangoUnicodeDecodeError(s, *e.args)
         else:
             # If we get to here, the caller has passed in an Exception
             # subclass populated with non-ASCII bytestring data without a
             # working unicode method. Try to handle this without raising a
             # further exception by individually forcing the exception args
             # to unicode.
             s = ' '.join([force_unicode(arg, encoding, strings_only,
                     errors) for arg in s])
     return s

 def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
     """
     Returns a bytestring version of 's', encoded as specified in 'encoding'.

     If strings_only is True, don't convert (some) non-string-like objects.
     """
     if strings_only and isinstance(s, (types.NoneType, int)):
         return s
     if isinstance(s, Promise):
         return unicode(s).encode(encoding, errors)
     elif not isinstance(s, basestring):
         try:
             return str(s)
         except UnicodeEncodeError:
             if isinstance(s, Exception):
                 # An Exception subclass containing non-ASCII data that doesn't
                 # know how to print itself properly. We shouldn't raise a
                 # further exception.
                 return ' '.join([smart_str(arg, encoding, strings_only,
                         errors) for arg in s])
             return unicode(s).encode(encoding, errors)
     elif isinstance(s, unicode):
         return s.encode(encoding, errors)
     elif s and encoding != 'utf-8':
         return s.decode('utf-8', errors).encode(encoding, errors)
     else:
         return s

 def iri_to_uri(iri):
     """
     Convert an Internationalized Resource Identifier (IRI) portion to a URI
     portion that is suitable for inclusion in a URL.

     This is the algorithm from section 3.1 of RFC 3987.  However, since we are
     assuming input is either UTF-8 or unicode already, we can simplify things a
     little from the full method.

     Returns an ASCII string containing the encoded result.
     """
     # The list of safe characters here is constructed from the "reserved" and
     # "unreserved" characters specified in sections 2.2 and 2.3 of RFC 3986:
     #     reserved    = gen-delims / sub-delims
     #     gen-delims  = ":" / "/" / "?" / "#" / "[" / "]" / "@"
     #     sub-delims  = "!" / "$" / "&" / "'" / "(" / ")"
     #                   / "*" / "+" / "," / ";" / "="
     #     unreserved  = ALPHA / DIGIT / "-" / "." / "_" / "~"
     # Of the unreserved characters, urllib.quote already considers all but
     # the ~ safe.
     # The % character is also added to the list of safe characters here, as the
     # end of section 3.1 of RFC 3987 specifically mentions that % must not be
     # converted.
     if iri is None:
         return iri
     return urllib.quote(smart_str(iri), safe="/#%[]=:;$&()+,!?*@'~")


 # The encoding of the default system locale but falls back to the
 # given fallback encoding if the encoding is unsupported by python or could
 # not be determined.  See tickets #10335 and #5846
 try:
     DEFAULT_LOCALE_ENCODING = locale.getdefaultlocale()[1] or 'ascii'
     codecs.lookup(DEFAULT_LOCALE_ENCODING)
 except:
     DEFAULT_LOCALE_ENCODING = 'ascii'
	import types
	import urllib
	import locale
	import datetime
	import codecs
	from decimal import Decimal

	from django.utils.functional import Promise

	class DjangoUnicodeDecodeError(UnicodeDecodeError):
	def __init__(self, obj, *args):
	self.obj = obj
	UnicodeDecodeError.__init__(self, *args)

	def __str__(self):
	original = UnicodeDecodeError.__str__(self)
	return '%s. You passed in %r (%s)' % (original, self.obj,
	type(self.obj))

	class StrAndUnicode(object):
	"""
	A class whose __str__ returns its __unicode__ as a UTF-8 bytestring.

	Useful as a mix-in.
	"""
	def __str__(self):
	return self.__unicode__().encode('utf-8')

	def smart_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
	"""
	Returns a unicode object representing 's'. Treats bytestrings using the
	'encoding' codec.

	If strings_only is True, don't convert (some) non-string-like objects.
	"""
	if isinstance(s, Promise):
	# The input is the result of a gettext_lazy() call.
	return s
	return force_unicode(s, encoding, strings_only, errors)

	def is_protected_type(obj):
	"""Determine if the object instance is of a protected type.

	Objects of protected types are preserved as-is when passed to
	force_unicode(strings_only=True).
	"""
	return isinstance(obj, (
	types.NoneType,
	int, long,
	datetime.datetime, datetime.date, datetime.time,
	float, Decimal)
	)

	def force_unicode(s, encoding='utf-8', strings_only=False, errors='strict'):
	"""
	Similar to smart_unicode, except that lazy instances are resolved to
	strings, rather than kept as lazy objects.

	If strings_only is True, don't convert (some) non-string-like objects.
	"""
	# Handle the common case first, saves 30-40% in performance when s
	# is an instance of unicode. This function gets called often in that
	# setting.
	if isinstance(s, unicode):
	return s
	if strings_only and is_protected_type(s):
	return s
	try:
	if not isinstance(s, basestring,):
	if hasattr(s, '__unicode__'):
	s = unicode(s)
	else:
	try:
	s = unicode(str(s), encoding, errors)
	except UnicodeEncodeError:
	if not isinstance(s, Exception):
	raise
	# If we get to here, the caller has passed in an Exception
	# subclass populated with non-ASCII data without special
	# handling to display as a string. We need to handle this
	# without raising a further exception. We do an
	# approximation to what the Exception's standard str()
	# output should be.
	s = ' '.join([force_unicode(arg, encoding, strings_only,
	errors) for arg in s])
	elif not isinstance(s, unicode):
	# Note: We use .decode() here, instead of unicode(s, encoding,
	# errors), so that if s is a SafeString, it ends up being a
	# SafeUnicode at the end.
	s = s.decode(encoding, errors)
	except UnicodeDecodeError, e:
	if not isinstance(s, Exception):
	raise DjangoUnicodeDecodeError(s, *e.args)
	else:
	# If we get to here, the caller has passed in an Exception
	# subclass populated with non-ASCII bytestring data without a
	# working unicode method. Try to handle this without raising a
	# further exception by individually forcing the exception args
	# to unicode.
	s = ' '.join([force_unicode(arg, encoding, strings_only,
	errors) for arg in s])
	return s

	def smart_str(s, encoding='utf-8', strings_only=False, errors='strict'):
	"""
	Returns a bytestring version of 's', encoded as specified in 'encoding'.

	If strings_only is True, don't convert (some) non-string-like objects.
	"""
	if strings_only and isinstance(s, (types.NoneType, int)):
	return s
	if isinstance(s, Promise):
	return unicode(s).encode(encoding, errors)
	elif not isinstance(s, basestring):
	try:
	return str(s)
	except UnicodeEncodeError:
	if isinstance(s, Exception):
	# An Exception subclass containing non-ASCII data that doesn't
	# know how to print itself properly. We shouldn't raise a
	# further exception.
	return ' '.join([smart_str(arg, encoding, strings_only,
	errors) for arg in s])
	return unicode(s).encode(encoding, errors)
	elif isinstance(s, unicode):
	return s.encode(encoding, errors)
	elif s and encoding != 'utf-8':
	return s.decode('utf-8', errors).encode(encoding, errors)
	else:
	return s

	def iri_to_uri(iri):
	"""
	Convert an Internationalized Resource Identifier (IRI) portion to a URI
	portion that is suitable for inclusion in a URL.

	This is the algorithm from section 3.1 of RFC 3987. However, since we are
	assuming input is either UTF-8 or unicode already, we can simplify things a
	little from the full method.

	Returns an ASCII string containing the encoded result.
	"""
	# The list of safe characters here is constructed from the "reserved" and
	# "unreserved" characters specified in sections 2.2 and 2.3 of RFC 3986:
	# reserved = gen-delims / sub-delims
	# gen-delims = ":" / "/" / "?" / "#" / "[" / "]" / "@"
	# sub-delims = "!" / "$" / "&" / "'" / "(" / ")"
	# / "*" / "+" / "," / ";" / "="
	# unreserved = ALPHA / DIGIT / "-" / "." / "_" / "~"
	# Of the unreserved characters, urllib.quote already considers all but
	# the ~ safe.
	# The % character is also added to the list of safe characters here, as the
	# end of section 3.1 of RFC 3987 specifically mentions that % must not be
	# converted.
	if iri is None:
	return iri
	return urllib.quote(smart_str(iri), safe="/#%[]=:;$&()+,!?*@'~")


	# The encoding of the default system locale but falls back to the
	# given fallback encoding if the encoding is unsupported by python or could
	# not be determined. See tickets #10335 and #5846
	try:
	DEFAULT_LOCALE_ENCODING = locale.getdefaultlocale()[1] or 'ascii'
	codecs.lookup(DEFAULT_LOCALE_ENCODING)
	except:
	DEFAULT_LOCALE_ENCODING = 'ascii'