blinkster: comparison web/lib/django/utils/text.py

equal deleted inserted replaced

-:b758351d191f
+:cc9b7e14412b
 import re
-from django.conf import settings
 from django.utils.encoding import force_unicode
 from django.utils.functional import allow_lazy
 from django.utils.translation import ugettext_lazy
 from htmlentitydefs import name2codepoint
 pos = len(lines[-1])
 yield word
 return u''.join(_generator())
 wrap = allow_lazy(wrap, unicode)
-def truncate_words(s, num):
+def truncate_words(s, num, end_text='...'):
-"Truncates a string after a certain number of words."
+"""Truncates a string after a certain number of words. Takes an optional
+argument of what should be used to notify that the string has been
+truncated, defaults to ellipsis (...)"""
 s = force_unicode(s)
 length = int(num)
 words = s.split()
 if len(words) > length:
 words = words[:length]
-if not words[-1].endswith('...'):
+if not words[-1].endswith(end_text):
-words.append('...')
+words.append(end_text)
 return u' '.join(words)
 truncate_words = allow_lazy(truncate_words, unicode)
-def truncate_html_words(s, num):
+def truncate_html_words(s, num, end_text='...'):
-"""
+"""Truncates html to a certain number of words (not counting tags and
-Truncates html to a certain number of words (not counting tags and
 comments). Closes opened tags if they were correctly closed in the given
-html.
+html. Takes an optional argument of what should be used to notify that the
-"""
+string has been truncated, defaults to ellipsis (...)."""
 s = force_unicode(s)
 length = int(num)
 if length <= 0:
 return u''
 html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
 # Set up regular expressions
 re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
 re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
 # Count non-HTML words and keep note of open tags
 pos = 0
-ellipsis_pos = 0
+end_text_pos = 0
 words = 0
 open_tags = []
 while words <= length:
 m = re_words.search(s, pos)
 if not m:
 pos = m.end(0)
 if m.group(1):
 # It's an actual non-HTML word
 words += 1
 if words == length:
-ellipsis_pos = pos
+end_text_pos = pos
 continue
 # Check for tag
 tag = re_tag.match(m.group(0))
-if not tag or ellipsis_pos:
+if not tag or end_text_pos:
 # Don't worry about non tags or tags after our truncate point
 continue
 closing_tag, tagname, self_closing = tag.groups()
 tagname = tagname.lower()  # Element names are always case-insensitive
 if self_closing or tagname in html4_singlets:
 # Add it to the start of the open tags list
 open_tags.insert(0, tagname)
 if words <= length:
 # Don't try to close tags if we don't need to truncate
 return s
-out = s[:ellipsis_pos] + ' ...'
+out = s[:end_text_pos]
+if end_text:
+out += ' ' + end_text
 # Close any tags still open
 for tag in open_tags:
 out += '</%s>' % tag
 # Return string
 return out
 return text
 recapitalize = allow_lazy(recapitalize)
 def phone2numeric(phone):
 "Converts a phone number with letters into its numeric equivalent."
-letters = re.compile(r'[A-PR-Y]', re.I)
+letters = re.compile(r'[A-Z]', re.I)
-char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3',
+char2number = lambda m: {'a': '2', 'b': '2', 'c': '2', 'd': '3', 'e': '3',
-'d': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5',
+'f': '3', 'g': '4', 'h': '4', 'i': '4', 'j': '5', 'k': '5', 'l': '5',
-'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7',
+'m': '6', 'n': '6', 'o': '6', 'p': '7', 'q': '7', 'r': '7', 's': '7',
-'s': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8',
+'t': '8', 'u': '8', 'v': '8', 'w': '9', 'x': '9', 'y': '9', 'z': '9',
-'y': '9', 'x': '9'}.get(m.group(0).lower())
+}.get(m.group(0).lower())
 return letters.sub(char2number, phone)
 phone2numeric = allow_lazy(phone2numeric)
 # From http://www.xhaus.com/alan/python/httpcomp.html#gzip
 # Used with permission.
 return r"\u%04x" % ord(match.group(1))
 if type(s) == str:
 s = s.decode('utf-8')
 elif type(s) != unicode:
-raise TypeError, s
+raise TypeError(s)
 s = s.replace('\\', '\\\\')
 s = s.replace('\r', '\\r')
 s = s.replace('\n', '\\n')
 s = s.replace('\t', '\\t')
 s = s.replace("'", "\\'")
 javascript_quote = allow_lazy(javascript_quote, unicode)
 # Expression to match some_token and some_token="with spaces" (and similarly
 # for single-quoted strings).
 smart_split_re = re.compile(r"""
-([^\s"]*"(?:[^"\\]*(?:\\.[^"\\]*)*)"\S*|
+((?:
-[^\s']*'(?:[^'\\]*(?:\\.[^'\\]*)*)'\S*|
+[^\s'"]*
-\S+)""", re.VERBOSE)
+(?:
+(?:"(?:[^"\\]|\\.)*" | '(?:[^'\\]|\\.)*')
+[^\s'"]*
+)+
+) | \S+)
+""", re.VERBOSE)
 def smart_split(text):
 r"""
 Generator that splits a string by spaces, leaving quoted phrases together.
 Supports both single and double quotes, and supports escaping quotes with

changeset 29	cc9b7e14412b
parent 0	0d40e90630ef