src/cm/utils/html.py
author Production Moz <dev@sopinspace.com>
Tue, 15 May 2012 14:15:34 +0200
changeset 435 96c16cc6408b
parent 175 4f072edc51a1
permissions -rw-r--r--
Fixed comments filter by name when name is empty
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     1
"""
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     2
Package to manipulage html chunks
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     3
"""
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     4
175
4f072edc51a1 BUG FIX : handling html
rbernard
parents: 154
diff changeset
     5
from string_utils import strip_cr
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     6
from BeautifulSoup import BeautifulSoup, Comment
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     7
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     8
def surrond_text_node(html_chunk, start_html, end_html):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     9
    """
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    10
    Surround text nodes in html_chunk
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    11
    """
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    12
    soup = BeautifulSoup(html_chunk)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    13
    text_nodes = get_text_nodes(soup)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    14
    for text_node in text_nodes:        
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    15
        if text_node.string.strip():
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    16
            text_node.replaceWith(start_html + text_node.string + end_html)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    17
    return unicode(soup)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    18
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    19
175
4f072edc51a1 BUG FIX : handling html
rbernard
parents: 154
diff changeset
    20
#utilities    
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    21
def get_text_nodes(soup):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    22
    return soup(text=lambda text:not isinstance(text, Comment))
175
4f072edc51a1 BUG FIX : handling html
rbernard
parents: 154
diff changeset
    23
4f072edc51a1 BUG FIX : handling html
rbernard
parents: 154
diff changeset
    24
#WARNING behavior changed also for mardown. but really shouldn't hurt 20100212
4f072edc51a1 BUG FIX : handling html
rbernard
parents: 154
diff changeset
    25
#it is text as received from textarea
4f072edc51a1 BUG FIX : handling html
rbernard
parents: 154
diff changeset
    26
def on_content_receive(txt, format) :
4f072edc51a1 BUG FIX : handling html
rbernard
parents: 154
diff changeset
    27
    #because textarea content arent packaged the same way on windows IE and linux FF, dhouldn't't hurt to clean it for any format
4f072edc51a1 BUG FIX : handling html
rbernard
parents: 154
diff changeset
    28
    return strip_cr(txt)
4f072edc51a1 BUG FIX : handling html
rbernard
parents: 154
diff changeset
    29