src/cm/utils/html.py
author raph
Sat, 06 Feb 2010 18:04:01 +0100
changeset 151 150cb6e76f30
parent 0 40c8f766c9b8
child 154 8cb89c153140
permissions -rw-r--r--
add permission check on text compare
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     1
"""
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     2
Package to manipulage html chunks
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     3
"""
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     4
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     5
from BeautifulSoup import BeautifulSoup, Comment
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     6
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     7
def surrond_text_node(html_chunk, start_html, end_html):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     8
    """
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     9
    Surround text nodes in html_chunk
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    10
    """
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    11
    soup = BeautifulSoup(html_chunk)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    12
    text_nodes = get_text_nodes(soup)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    13
    for text_node in text_nodes:        
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    14
        if text_node.string.strip():
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    15
            text_node.replaceWith(start_html + text_node.string + end_html)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    16
    return unicode(soup)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    17
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    18
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    19
# utilities    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    20
def get_text_nodes(soup):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    21
    return soup(text=lambda text:not isinstance(text, Comment))
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    22
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    23
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    24
import re
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    25
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    26
def cleanup_textarea(input):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    27
    """
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    28
    Cleanup \r\n to standard \n    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    29
    """
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    30
    return re.sub('(\r\n)|(\n)|(\r)','\n',input)