equal
deleted
inserted
replaced
5 |
5 |
6 |
6 |
7 def get_text_nodes(soup): |
7 def get_text_nodes(soup): |
8 return soup(text=lambda text:not isinstance(text, Comment)) |
8 return soup(text=lambda text:not isinstance(text, Comment)) |
9 |
9 |
10 def is_real_text_node(textNode): |
10 def is_real_text_node(textNode, nolinefeed=True): |
11 if textNode.string == "\n": |
11 if nolinefeed and textNode.string == "\n": |
12 return False |
12 return False |
13 return not textNode.findParent('style') |
13 return not textNode.findParent('style') |
14 |
14 |
15 def get_the_soup(input): |
15 def get_the_soup(input): |
16 return BeautifulSoup(input, convertEntities=BeautifulSoup.ALL_ENTITIES) |
16 return BeautifulSoup(input, convertEntities=BeautifulSoup.ALL_ENTITIES) |
17 |
17 |
18 from cm.utils.cache import memoize, dj_memoize |
18 from cm.utils.cache import memoize, dj_memoize |
19 @dj_memoize |
19 @dj_memoize |
20 def spannify(input): |
20 def spannify(input, nolinefeed=True): |
21 """ |
21 """ |
22 wrap textNodes in spans |
22 wrap textNodes in spans |
23 """ |
23 """ |
24 |
24 |
25 input = re.sub("\s*$","",input) |
25 input = re.sub("\s*$","",input) |
30 textNodes_content = [] |
30 textNodes_content = [] |
31 |
31 |
32 span_starts = {} |
32 span_starts = {} |
33 for i in xrange(len(textNodes)): |
33 for i in xrange(len(textNodes)): |
34 textNode = textNodes[i] |
34 textNode = textNodes[i] |
35 if is_real_text_node(textNode) : |
35 if is_real_text_node(textNode, nolinefeed) : |
36 textNode.replaceWith('<span id="sv_' + str(i) + '" class="c-s"><span id="sv-' + str(i) + '" class="c-count-0 c-c">' + textNode.string + '</span></span>') |
36 textNode.replaceWith('<span id="sv_' + str(i) + '" class="c-s"><span id="sv-' + str(i) + '" class="c-count-0 c-c">' + textNode.string + '</span></span>') |
37 span_starts[i] = len(''.join(textNodes_content)) |
37 span_starts[i] = len(''.join(textNodes_content)) |
38 textNodes_content.append(textNode.string) |
38 textNodes_content.append(textNode.string) |
39 output = unicode(soup) |
39 output = unicode(soup) |
40 # Soup has introduced HTML entities, which should be expanded |
40 # Soup has introduced HTML entities, which should be expanded |