src/cm/utils/html.py
author rbernard
Fri, 12 Feb 2010 21:59:18 +0100
changeset 173 c1aa800029d6
parent 154 8cb89c153140
child 175 4f072edc51a1
permissions -rw-r--r--
renaming variable
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     1
"""
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     2
Package to manipulage html chunks
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     3
"""
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     4
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     5
from BeautifulSoup import BeautifulSoup, Comment
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     6
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     7
def surrond_text_node(html_chunk, start_html, end_html):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     8
    """
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     9
    Surround text nodes in html_chunk
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    10
    """
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    11
    soup = BeautifulSoup(html_chunk)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    12
    text_nodes = get_text_nodes(soup)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    13
    for text_node in text_nodes:        
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    14
        if text_node.string.strip():
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    15
            text_node.replaceWith(start_html + text_node.string + end_html)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    16
    return unicode(soup)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    17
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    18
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    19
# utilities    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    20
def get_text_nodes(soup):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    21
    return soup(text=lambda text:not isinstance(text, Comment))