web/lib/django/utils/text.py
changeset 29 cc9b7e14412b
parent 0 0d40e90630ef
equal deleted inserted replaced
28:b758351d191f 29:cc9b7e14412b
     1 import re
     1 import re
     2 from django.conf import settings
       
     3 from django.utils.encoding import force_unicode
     2 from django.utils.encoding import force_unicode
     4 from django.utils.functional import allow_lazy
     3 from django.utils.functional import allow_lazy
     5 from django.utils.translation import ugettext_lazy
     4 from django.utils.translation import ugettext_lazy
     6 from htmlentitydefs import name2codepoint
     5 from htmlentitydefs import name2codepoint
     7 
     6 
    35                     pos = len(lines[-1])
    34                     pos = len(lines[-1])
    36             yield word
    35             yield word
    37     return u''.join(_generator())
    36     return u''.join(_generator())
    38 wrap = allow_lazy(wrap, unicode)
    37 wrap = allow_lazy(wrap, unicode)
    39 
    38 
    40 def truncate_words(s, num):
    39 def truncate_words(s, num, end_text='...'):
    41     "Truncates a string after a certain number of words."
    40     """Truncates a string after a certain number of words. Takes an optional
       
    41     argument of what should be used to notify that the string has been
       
    42     truncated, defaults to ellipsis (...)"""
    42     s = force_unicode(s)
    43     s = force_unicode(s)
    43     length = int(num)
    44     length = int(num)
    44     words = s.split()
    45     words = s.split()
    45     if len(words) > length:
    46     if len(words) > length:
    46         words = words[:length]
    47         words = words[:length]
    47         if not words[-1].endswith('...'):
    48         if not words[-1].endswith(end_text):
    48             words.append('...')
    49             words.append(end_text)
    49     return u' '.join(words)
    50     return u' '.join(words)
    50 truncate_words = allow_lazy(truncate_words, unicode)
    51 truncate_words = allow_lazy(truncate_words, unicode)
    51 
    52 
    52 def truncate_html_words(s, num):
    53 def truncate_html_words(s, num, end_text='...'):
    53     """
    54     """Truncates html to a certain number of words (not counting tags and
    54     Truncates html to a certain number of words (not counting tags and
       
    55     comments). Closes opened tags if they were correctly closed in the given
    55     comments). Closes opened tags if they were correctly closed in the given
    56     html.
    56     html. Takes an optional argument of what should be used to notify that the
    57     """
    57     string has been truncated, defaults to ellipsis (...)."""
    58     s = force_unicode(s)
    58     s = force_unicode(s)
    59     length = int(num)
    59     length = int(num)
    60     if length <= 0:
    60     if length <= 0:
    61         return u''
    61         return u''
    62     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
    62     html4_singlets = ('br', 'col', 'link', 'base', 'img', 'param', 'area', 'hr', 'input')
    63     # Set up regular expressions
    63     # Set up regular expressions
    64     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
    64     re_words = re.compile(r'&.*?;|<.*?>|(\w[\w-]*)', re.U)
    65     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
    65     re_tag = re.compile(r'<(/)?([^ ]+?)(?: (/)| .*?)?>')
    66     # Count non-HTML words and keep note of open tags
    66     # Count non-HTML words and keep note of open tags
    67     pos = 0
    67     pos = 0
    68     ellipsis_pos = 0
    68     end_text_pos = 0
    69     words = 0
    69     words = 0
    70     open_tags = []
    70     open_tags = []
    71     while words <= length:
    71     while words <= length:
    72         m = re_words.search(s, pos)
    72         m = re_words.search(s, pos)
    73         if not m:
    73         if not m:
    76         pos = m.end(0)
    76         pos = m.end(0)
    77         if m.group(1):
    77         if m.group(1):
    78             # It's an actual non-HTML word
    78             # It's an actual non-HTML word
    79             words += 1
    79             words += 1
    80             if words == length:
    80             if words == length:
    81                 ellipsis_pos = pos
    81                 end_text_pos = pos
    82             continue
    82             continue
    83         # Check for tag
    83         # Check for tag
    84         tag = re_tag.match(m.group(0))
    84         tag = re_tag.match(m.group(0))
    85         if not tag or ellipsis_pos:
    85         if not tag or end_text_pos:
    86             # Don't worry about non tags or tags after our truncate point
    86             # Don't worry about non tags or tags after our truncate point
    87             continue
    87             continue
    88         closing_tag, tagname, self_closing = tag.groups()
    88         closing_tag, tagname, self_closing = tag.groups()
    89         tagname = tagname.lower()  # Element names are always case-insensitive
    89         tagname = tagname.lower()  # Element names are always case-insensitive
    90         if self_closing or tagname in html4_singlets:
    90         if self_closing or tagname in html4_singlets:
   102             # Add it to the start of the open tags list
   102             # Add it to the start of the open tags list
   103             open_tags.insert(0, tagname)
   103             open_tags.insert(0, tagname)
   104     if words <= length:
   104     if words <= length:
   105         # Don't try to close tags if we don't need to truncate
   105         # Don't try to close tags if we don't need to truncate
   106         return s
   106         return s
   107     out = s[:ellipsis_pos] + ' ...'
   107     out = s[:end_text_pos]
       
   108     if end_text:
       
   109         out += ' ' + end_text
   108     # Close any tags still open
   110     # Close any tags still open
   109     for tag in open_tags:
   111     for tag in open_tags:
   110         out += '</%s>' % tag
   112         out += '</%s>' % tag
   111     # Return string
   113     # Return string
   112     return out
   114     return out
   155     return text
   157     return text
   156 recapitalize = allow_lazy(recapitalize)
   158 recapitalize = allow_lazy(recapitalize)
   157 
   159 
   158 def phone2numeric(phone):
   160 def phone2numeric(phone):
   159     "Converts a phone number with letters into its numeric equivalent."
   161     "Converts a phone number with letters into its numeric equivalent."
   160     letters = re.compile(r'[A-PR-Y]', re.I)
   162     letters = re.compile(r'[A-Z]', re.I)
   161     char2number = lambda m: {'a': '2', 'c': '2', 'b': '2', 'e': '3',
   163     char2number = lambda m: {'a': '2', 'b': '2', 'c': '2', 'd': '3', 'e': '3',
   162          'd': '3', 'g': '4', 'f': '3', 'i': '4', 'h': '4', 'k': '5',
   164          'f': '3', 'g': '4', 'h': '4', 'i': '4', 'j': '5', 'k': '5', 'l': '5',
   163          'j': '5', 'm': '6', 'l': '5', 'o': '6', 'n': '6', 'p': '7',
   165          'm': '6', 'n': '6', 'o': '6', 'p': '7', 'q': '7', 'r': '7', 's': '7',
   164          's': '7', 'r': '7', 'u': '8', 't': '8', 'w': '9', 'v': '8',
   166          't': '8', 'u': '8', 'v': '8', 'w': '9', 'x': '9', 'y': '9', 'z': '9',
   165          'y': '9', 'x': '9'}.get(m.group(0).lower())
   167         }.get(m.group(0).lower())
   166     return letters.sub(char2number, phone)
   168     return letters.sub(char2number, phone)
   167 phone2numeric = allow_lazy(phone2numeric)
   169 phone2numeric = allow_lazy(phone2numeric)
   168 
   170 
   169 # From http://www.xhaus.com/alan/python/httpcomp.html#gzip
   171 # From http://www.xhaus.com/alan/python/httpcomp.html#gzip
   170 # Used with permission.
   172 # Used with permission.
   184         return r"\u%04x" % ord(match.group(1))
   186         return r"\u%04x" % ord(match.group(1))
   185 
   187 
   186     if type(s) == str:
   188     if type(s) == str:
   187         s = s.decode('utf-8')
   189         s = s.decode('utf-8')
   188     elif type(s) != unicode:
   190     elif type(s) != unicode:
   189         raise TypeError, s
   191         raise TypeError(s)
   190     s = s.replace('\\', '\\\\')
   192     s = s.replace('\\', '\\\\')
   191     s = s.replace('\r', '\\r')
   193     s = s.replace('\r', '\\r')
   192     s = s.replace('\n', '\\n')
   194     s = s.replace('\n', '\\n')
   193     s = s.replace('\t', '\\t')
   195     s = s.replace('\t', '\\t')
   194     s = s.replace("'", "\\'")
   196     s = s.replace("'", "\\'")
   198 javascript_quote = allow_lazy(javascript_quote, unicode)
   200 javascript_quote = allow_lazy(javascript_quote, unicode)
   199 
   201 
   200 # Expression to match some_token and some_token="with spaces" (and similarly
   202 # Expression to match some_token and some_token="with spaces" (and similarly
   201 # for single-quoted strings).
   203 # for single-quoted strings).
   202 smart_split_re = re.compile(r"""
   204 smart_split_re = re.compile(r"""
   203     ([^\s"]*"(?:[^"\\]*(?:\\.[^"\\]*)*)"\S*|
   205     ((?:
   204      [^\s']*'(?:[^'\\]*(?:\\.[^'\\]*)*)'\S*|
   206         [^\s'"]*
   205      \S+)""", re.VERBOSE)
   207         (?:
       
   208             (?:"(?:[^"\\]|\\.)*" | '(?:[^'\\]|\\.)*')
       
   209             [^\s'"]*
       
   210         )+
       
   211     ) | \S+)
       
   212 """, re.VERBOSE)
   206 
   213 
   207 def smart_split(text):
   214 def smart_split(text):
   208     r"""
   215     r"""
   209     Generator that splits a string by spaces, leaving quoted phrases together.
   216     Generator that splits a string by spaces, leaving quoted phrases together.
   210     Supports both single and double quotes, and supports escaping quotes with
   217     Supports both single and double quotes, and supports escaping quotes with