src/cm/utils/spannifier.py
author Simon Descarpentries <sid@sopinspace.com>
Mon, 21 Oct 2013 16:37:07 +0200
changeset 553 bf26fb47a14c
parent 516 c6105d922ac6
permissions -rw-r--r--
To allow scrolling in Safari mobile, we set the content of text_view_comments frame in a jQuery UI layout. So the automated scrolling operations in c_sync.js must be adjustable to the right part to scroll. Also, if a comment have to be shown outside of the current viewport, we scroll the correct part to that viewport and then set the comment top Y offset to juste what it needs to avoid the "Add comment" button after scrolling operation. If not in Safari mobile, we add an offset here to avoid comment to display under the "Add comment" button.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     1
import uuid
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     2
import xml.dom.minidom
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     3
import re
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     4
from BeautifulSoup import BeautifulSoup, Comment
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     5
 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     6
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     7
def get_text_nodes(soup):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     8
    return soup(text=lambda text:not isinstance(text, Comment))
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     9
502
8ec189cc214d do not skip span for newline textnodes otherwise compute_new_comment_positions() will return bad results for pandoc texts.
gibus
parents: 473
diff changeset
    10
def is_real_text_node(textNode, nolinefeed=True):
8ec189cc214d do not skip span for newline textnodes otherwise compute_new_comment_positions() will return bad results for pandoc texts.
gibus
parents: 473
diff changeset
    11
    if nolinefeed and textNode.string == "\n":
473
cefe588b2a2b Do not spannify empty text nodes, prevents abiword crash.
gibus
parents: 464
diff changeset
    12
      return False
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    13
    return not textNode.findParent('style') 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    14
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    15
def get_the_soup(input):
516
c6105d922ac6 For some reason, BeautifulSoup wants now fromEncoding='UTF-8' in some unidentified cases.
gibus
parents: 502
diff changeset
    16
    return BeautifulSoup(input, convertEntities=BeautifulSoup.ALL_ENTITIES, fromEncoding='UTF-8')
270
05a602160c58 cache spannify function
raph
parents: 0
diff changeset
    17
     
05a602160c58 cache spannify function
raph
parents: 0
diff changeset
    18
from cm.utils.cache import memoize, dj_memoize
05a602160c58 cache spannify function
raph
parents: 0
diff changeset
    19
@dj_memoize
502
8ec189cc214d do not skip span for newline textnodes otherwise compute_new_comment_positions() will return bad results for pandoc texts.
gibus
parents: 473
diff changeset
    20
def spannify(input, nolinefeed=True):
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    21
    """ 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    22
    wrap textNodes in spans 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    23
    """
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    24
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    25
    input = re.sub("\s*$","",input)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    26
        
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    27
    soup = get_the_soup(input)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    28
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    29
    textNodes = get_text_nodes(soup)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    30
    textNodes_content = []
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    31
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    32
    span_starts = {}
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    33
    for i in xrange(len(textNodes)):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    34
        textNode = textNodes[i]
502
8ec189cc214d do not skip span for newline textnodes otherwise compute_new_comment_positions() will return bad results for pandoc texts.
gibus
parents: 473
diff changeset
    35
        if is_real_text_node(textNode, nolinefeed) :
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    36
            textNode.replaceWith('<span id="sv_' + str(i) + '" class="c-s"><span id="sv-' + str(i) + '" class="c-count-0 c-c">' + textNode.string + '</span></span>')
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    37
            span_starts[i] = len(''.join(textNodes_content))
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    38
            textNodes_content.append(textNode.string)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    39
    output = unicode(soup)
450
81fa74c112b8 Fix #77 html code inside document viewer.
gibus
parents: 270
diff changeset
    40
    # Soup has introduced HTML entities, which should be expanded
81fa74c112b8 Fix #77 html code inside document viewer.
gibus
parents: 270
diff changeset
    41
    output =re.sub(r"&quot;", '"', output)
81fa74c112b8 Fix #77 html code inside document viewer.
gibus
parents: 270
diff changeset
    42
    output =re.sub(r"&amp;", '&', output)
81fa74c112b8 Fix #77 html code inside document viewer.
gibus
parents: 270
diff changeset
    43
    output =re.sub(r"&gt;", '>', output)
81fa74c112b8 Fix #77 html code inside document viewer.
gibus
parents: 270
diff changeset
    44
    output =re.sub(r"&lt;", '<', output)
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    45
         
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    46
    textualized = ''.join(textNodes_content)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    47
    return output, textualized, span_starts