web/lib/django/utils/html.py
author ymh <ymh.work@gmail.com>
Wed, 20 Jan 2010 00:34:04 +0100
changeset 0 0d40e90630ef
permissions -rw-r--r--
Blinkster creation
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
"""HTML utilities suitable for global use."""
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
import re
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
import string
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
from django.utils.safestring import SafeData, mark_safe
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
from django.utils.encoding import force_unicode
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
from django.utils.functional import allow_lazy
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
from django.utils.http import urlquote
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
# Configuration for urlize() function.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
LEADING_PUNCTUATION  = ['(', '<', '&lt;']
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
TRAILING_PUNCTUATION = ['.', ',', ')', '>', '\n', '&gt;']
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
# List of possible strings used for bullets in bulleted lists.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
DOTS = ['&middot;', '*', '\xe2\x80\xa2', '&#149;', '&bull;', '&#8226;']
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
unencoded_ampersands_re = re.compile(r'&(?!(\w+|#\d+);)')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
word_split_re = re.compile(r'(\s+)')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
punctuation_re = re.compile('^(?P<lead>(?:%s)*)(?P<middle>.*?)(?P<trail>(?:%s)*)$' % \
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
    ('|'.join([re.escape(x) for x in LEADING_PUNCTUATION]),
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
    '|'.join([re.escape(x) for x in TRAILING_PUNCTUATION])))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
simple_email_re = re.compile(r'^\S+@[a-zA-Z0-9._-]+\.[a-zA-Z0-9._-]+$')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
link_target_attribute_re = re.compile(r'(<a [^>]*?)target=[^\s>]+')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
html_gunk_re = re.compile(r'(?:<br clear="all">|<i><\/i>|<b><\/b>|<em><\/em>|<strong><\/strong>|<\/?smallcaps>|<\/?uppercase>)', re.IGNORECASE)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
hard_coded_bullets_re = re.compile(r'((?:<p>(?:%s).*?[a-zA-Z].*?</p>\s*)+)' % '|'.join([re.escape(x) for x in DOTS]), re.DOTALL)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
trailing_empty_content_re = re.compile(r'(?:<p>(?:&nbsp;|\s|<br \/>)*?</p>\s*)+\Z')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
del x # Temporary variable
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
def escape(html):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
    Returns the given HTML with ampersands, quotes and angle brackets encoded.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
    return mark_safe(force_unicode(html).replace('&', '&amp;').replace('<', '&lt;').replace('>', '&gt;').replace('"', '&quot;').replace("'", '&#39;'))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
escape = allow_lazy(escape, unicode)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
def conditional_escape(html):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
    Similar to escape(), except that it doesn't operate on pre-escaped strings.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
    if isinstance(html, SafeData):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
        return html
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
    else:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
        return escape(html)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
def linebreaks(value, autoescape=False):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
    """Converts newlines into <p> and <br />s."""
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
    value = re.sub(r'\r\n|\r|\n', '\n', force_unicode(value)) # normalize newlines
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
    paras = re.split('\n{2,}', value)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
    if autoescape:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
        paras = [u'<p>%s</p>' % escape(p).replace('\n', '<br />') for p in paras]
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
    else:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
        paras = [u'<p>%s</p>' % p.replace('\n', '<br />') for p in paras]
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
    return u'\n\n'.join(paras)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
linebreaks = allow_lazy(linebreaks, unicode)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
def strip_tags(value):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
    """Returns the given HTML with all tags stripped."""
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
    return re.sub(r'<[^>]*?>', '', force_unicode(value))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
strip_tags = allow_lazy(strip_tags)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
def strip_spaces_between_tags(value):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
    """Returns the given HTML with spaces between tags removed."""
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
    return re.sub(r'>\s+<', '><', force_unicode(value))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
strip_spaces_between_tags = allow_lazy(strip_spaces_between_tags, unicode)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
def strip_entities(value):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
    """Returns the given HTML with all entities (&something;) stripped."""
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
    return re.sub(r'&(?:\w+|#\d+);', '', force_unicode(value))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
strip_entities = allow_lazy(strip_entities, unicode)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    72
def fix_ampersands(value):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
    """Returns the given HTML with all unencoded ampersands encoded correctly."""
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
    return unencoded_ampersands_re.sub('&amp;', force_unicode(value))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
fix_ampersands = allow_lazy(fix_ampersands, unicode)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
def urlize(text, trim_url_limit=None, nofollow=False, autoescape=False):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
    Converts any URLs in text into clickable links.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    80
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
    Works on http://, https://, www. links and links ending in .org, .net or
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
    .com. Links can have trailing punctuation (periods, commas, close-parens)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
    and leading punctuation (opening parens) and it'll still do the right
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
    thing.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
    If trim_url_limit is not None, the URLs in link text longer than this limit
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
    will truncated to trim_url_limit-3 characters and appended with an elipsis.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
    If nofollow is True, the URLs in link text will get a rel="nofollow"
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
    attribute.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
    If autoescape is True, the link text and URLs will get autoescaped.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    93
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
    trim_url = lambda x, limit=trim_url_limit: limit is not None and (len(x) > limit and ('%s...' % x[:max(0, limit - 3)])) or x
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
    safe_input = isinstance(text, SafeData)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    96
    words = word_split_re.split(force_unicode(text))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    97
    nofollow_attr = nofollow and ' rel="nofollow"' or ''
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    98
    for i, word in enumerate(words):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    99
        match = None
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
        if '.' in word or '@' in word or ':' in word:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   101
            match = punctuation_re.match(word)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   102
        if match:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   103
            lead, middle, trail = match.groups()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   104
            # Make URL we want to point to.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   105
            url = None
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   106
            if middle.startswith('http://') or middle.startswith('https://'):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   107
                url = urlquote(middle, safe='/&=:;#?+*')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   108
            elif middle.startswith('www.') or ('@' not in middle and \
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   109
                    middle and middle[0] in string.ascii_letters + string.digits and \
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   110
                    (middle.endswith('.org') or middle.endswith('.net') or middle.endswith('.com'))):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   111
                url = urlquote('http://%s' % middle, safe='/&=:;#?+*')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   112
            elif '@' in middle and not ':' in middle and simple_email_re.match(middle):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   113
                url = 'mailto:%s' % middle
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   114
                nofollow_attr = ''
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   115
            # Make link.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   116
            if url:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   117
                trimmed = trim_url(middle)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   118
                if autoescape and not safe_input:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   119
                    lead, trail = escape(lead), escape(trail)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   120
                    url, trimmed = escape(url), escape(trimmed)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   121
                middle = '<a href="%s"%s>%s</a>' % (url, nofollow_attr, trimmed)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   122
                words[i] = mark_safe('%s%s%s' % (lead, middle, trail))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   123
            else:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   124
                if safe_input:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   125
                    words[i] = mark_safe(word)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   126
                elif autoescape:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   127
                    words[i] = escape(word)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   128
        elif safe_input:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   129
            words[i] = mark_safe(word)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   130
        elif autoescape:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   131
            words[i] = escape(word)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   132
    return u''.join(words)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   133
urlize = allow_lazy(urlize, unicode)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   134
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   135
def clean_html(text):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   136
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   137
    Clean the given HTML.  Specifically, do the following:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   138
        * Convert <b> and <i> to <strong> and <em>.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   139
        * Encode all ampersands correctly.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   140
        * Remove all "target" attributes from <a> tags.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   141
        * Remove extraneous HTML, such as presentational tags that open and
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   142
          immediately close and <br clear="all">.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   143
        * Convert hard-coded bullets into HTML unordered lists.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   144
        * Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   145
          bottom of the text.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   146
    """
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   147
    from django.utils.text import normalize_newlines
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   148
    text = normalize_newlines(force_unicode(text))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   149
    text = re.sub(r'<(/?)\s*b\s*>', '<\\1strong>', text)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   150
    text = re.sub(r'<(/?)\s*i\s*>', '<\\1em>', text)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   151
    text = fix_ampersands(text)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   152
    # Remove all target="" attributes from <a> tags.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   153
    text = link_target_attribute_re.sub('\\1', text)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   154
    # Trim stupid HTML such as <br clear="all">.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   155
    text = html_gunk_re.sub('', text)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   156
    # Convert hard-coded bullets into HTML unordered lists.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   157
    def replace_p_tags(match):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   158
        s = match.group().replace('</p>', '</li>')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   159
        for d in DOTS:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   160
            s = s.replace('<p>%s' % d, '<li>')
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   161
        return u'<ul>\n%s\n</ul>' % s
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   162
    text = hard_coded_bullets_re.sub(replace_p_tags, text)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   163
    # Remove stuff like "<p>&nbsp;&nbsp;</p>", but only if it's at the bottom
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   164
    # of the text.
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   165
    text = trailing_empty_content_re.sub('', text)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   166
    return text
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   167
clean_html = allow_lazy(clean_html, unicode)