src/cm/utils/spannifier.py
changeset 502 8ec189cc214d
parent 473 cefe588b2a2b
child 516 c6105d922ac6
equal deleted inserted replaced
501:5cd02f32be5e 502:8ec189cc214d
     5  
     5  
     6 
     6 
     7 def get_text_nodes(soup):
     7 def get_text_nodes(soup):
     8     return soup(text=lambda text:not isinstance(text, Comment))
     8     return soup(text=lambda text:not isinstance(text, Comment))
     9 
     9 
    10 def is_real_text_node(textNode):
    10 def is_real_text_node(textNode, nolinefeed=True):
    11     if textNode.string == "\n":
    11     if nolinefeed and textNode.string == "\n":
    12       return False
    12       return False
    13     return not textNode.findParent('style') 
    13     return not textNode.findParent('style') 
    14 
    14 
    15 def get_the_soup(input):
    15 def get_the_soup(input):
    16     return BeautifulSoup(input, convertEntities=BeautifulSoup.ALL_ENTITIES)
    16     return BeautifulSoup(input, convertEntities=BeautifulSoup.ALL_ENTITIES)
    17      
    17      
    18 from cm.utils.cache import memoize, dj_memoize
    18 from cm.utils.cache import memoize, dj_memoize
    19 @dj_memoize
    19 @dj_memoize
    20 def spannify(input):
    20 def spannify(input, nolinefeed=True):
    21     """ 
    21     """ 
    22     wrap textNodes in spans 
    22     wrap textNodes in spans 
    23     """
    23     """
    24     
    24     
    25     input = re.sub("\s*$","",input)
    25     input = re.sub("\s*$","",input)
    30     textNodes_content = []
    30     textNodes_content = []
    31     
    31     
    32     span_starts = {}
    32     span_starts = {}
    33     for i in xrange(len(textNodes)):
    33     for i in xrange(len(textNodes)):
    34         textNode = textNodes[i]
    34         textNode = textNodes[i]
    35         if is_real_text_node(textNode) :
    35         if is_real_text_node(textNode, nolinefeed) :
    36             textNode.replaceWith('<span id="sv_' + str(i) + '" class="c-s"><span id="sv-' + str(i) + '" class="c-count-0 c-c">' + textNode.string + '</span></span>')
    36             textNode.replaceWith('<span id="sv_' + str(i) + '" class="c-s"><span id="sv-' + str(i) + '" class="c-count-0 c-c">' + textNode.string + '</span></span>')
    37             span_starts[i] = len(''.join(textNodes_content))
    37             span_starts[i] = len(''.join(textNodes_content))
    38             textNodes_content.append(textNode.string)
    38             textNodes_content.append(textNode.string)
    39     output = unicode(soup)
    39     output = unicode(soup)
    40     # Soup has introduced HTML entities, which should be expanded
    40     # Soup has introduced HTML entities, which should be expanded