src/cm/utils/comment_positioning.py
author Simon Descarpentries <sid@sopinspace.com>
Wed, 09 Apr 2014 16:57:53 +0200
changeset 632 f8733ce4ed05
parent 533 c7aed0340a00
permissions -rw-r--r--
Fix another typo in this template : 'tag_list>'
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     2
from difflib import SequenceMatcher
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     3
#from cm.utils.spannifier import Spannifier
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     4
import sys, operator
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     5
from cm.utils.spannifier import spannify
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     6
from cm.converters.pandoc_converters import pandoc_convert
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     7
import logging
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     8
from cm.utils.spannifier import get_the_soup
533
c7aed0340a00 For some reason, when exporting commented text with background colors, angle brackets for html spans are sometimes returned as entities by soup.
gibus
parents: 519
diff changeset
     9
import re
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    10
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    11
import html5lib
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    12
from html5lib import treebuilders
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    13
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    14
def compute_new_comment_positions(old_content, old_format, new_content, new_format, commentList):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    15
    
175
4f072edc51a1 BUG FIX : handling html
rbernard
parents: 173
diff changeset
    16
    # cf. TextVersion.get_content
4f072edc51a1 BUG FIX : handling html
rbernard
parents: 173
diff changeset
    17
    previousVersionContent = pandoc_convert(old_content, old_format, 'html')
4f072edc51a1 BUG FIX : handling html
rbernard
parents: 173
diff changeset
    18
    newVersionContent = pandoc_convert(new_content, new_format, 'html')
4f072edc51a1 BUG FIX : handling html
rbernard
parents: 173
diff changeset
    19
502
8ec189cc214d do not skip span for newline textnodes otherwise compute_new_comment_positions() will return bad results for pandoc texts.
gibus
parents: 498
diff changeset
    20
    _, previous_char_list, span_starts_previous = spannify(previousVersionContent, False)
8ec189cc214d do not skip span for newline textnodes otherwise compute_new_comment_positions() will return bad results for pandoc texts.
gibus
parents: 498
diff changeset
    21
    _, new_char_list, span_starts_new = spannify(newVersionContent, False)
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    22
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    23
    sm = SequenceMatcher(None, previous_char_list, new_char_list)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    24
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    25
    opcodes = sm.get_opcodes()
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    26
    to_remove_comments_ids = set()
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    27
    
103
61fd17f9ab78 enh: detached comments
rbernard
parents: 58
diff changeset
    28
    # limit to real comments (not replies) and those that have scope 
61fd17f9ab78 enh: detached comments
rbernard
parents: 58
diff changeset
    29
    commentList = [c for c in commentList if not c.is_reply() and not c.is_scope_removed()]
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    30
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    31
    for comment in commentList:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    32
        try:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    33
            comment.initial_start_offset = span_starts_previous[comment.start_wrapper] + comment.start_offset
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    34
            comment.initial_end_offset = span_starts_previous[comment.end_wrapper] + comment.end_offset
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    35
        except KeyError:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    36
            logging.error('Key error (wrapper out of bounds of span_starts_previous)')
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    37
            continue
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    38
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    39
        comment.computed_start_offset = comment.initial_start_offset
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    40
        comment.computed_end_offset = comment.initial_end_offset
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    41
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    42
#        comment.computed_start_wrapper = None
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    43
#        comment.computed_end_wrapper = None
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    44
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    45
        comment.valid = True
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    46
    for tag, i1, i2, j1, j2 in opcodes:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    47
        #print tag, i1, i2, j1, j2
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    48
        
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    49
        for i in xrange(len(commentList)) :            
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    50
            if tag != 'equal' :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    51
                comment = commentList[i]
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    52
                if not comment.valid:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    53
                    continue
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    54
                
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    55
                if comment.initial_start_offset >= i2 :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    56
                    # if offset
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    57
                    delta = ((j2 - j1) - (i2 - i1))
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    58
                    comment.computed_start_offset += delta
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    59
                    comment.computed_end_offset += delta
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    60
                    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    61
                elif comment.initial_end_offset > i1:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    62
                    comment.valid = False
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    63
                    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    64
        #    id, initial_start, initial_end, computed_start, computed_end, valid = self.computationResults[i]
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    65
172
9a355903350d renaming variables
rbernard
parents: 103
diff changeset
    66
    for cc in commentList:        
9a355903350d renaming variables
rbernard
parents: 103
diff changeset
    67
        if cc.valid:
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    68
            for id in xrange(len(span_starts_new.keys())):
496
c3df46754007 Fixes crash when editing, apparently when comment spans on multiple text nodes.
gibus
parents: 454
diff changeset
    69
                start = span_starts_new.get(id, 0)
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    70
                end = span_starts_new.get(id+1, sys.maxint)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    71
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    72
                # adjust start                
172
9a355903350d renaming variables
rbernard
parents: 103
diff changeset
    73
                if cc.computed_start_offset >= start and cc.computed_start_offset < end:
9a355903350d renaming variables
rbernard
parents: 103
diff changeset
    74
                    cc.start_wrapper = id
9a355903350d renaming variables
rbernard
parents: 103
diff changeset
    75
                    cc.start_offset = cc.computed_start_offset - start
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    76
                
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    77
                # adjust end                        
172
9a355903350d renaming variables
rbernard
parents: 103
diff changeset
    78
                if cc.computed_end_offset >= start and cc.computed_end_offset < end:
9a355903350d renaming variables
rbernard
parents: 103
diff changeset
    79
                    cc.end_wrapper = id
9a355903350d renaming variables
rbernard
parents: 103
diff changeset
    80
                    cc.end_offset = cc.computed_end_offset - start
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    81
            
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    82
    # returns to_modify, to_remove
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    83
    return [c for c in commentList if c.valid], \
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    84
           [c for c in commentList if not c.valid]
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    85
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    86
def add_marker(text, color, start_ids, end_ids, with_markers, with_colors):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    87
# TODO
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    88
# THESE 3 LINES ARE REALLY JUST FOR TESTING THIS IS COPIED FROM C-TEXT.CSS AND SHOULD BE DONE DIFFERENTLY
519
e89c25780e6e Gradient of colors for commented text depends on number of comments including replies, instead of depending only on number of threads.
gibus
parents: 502
diff changeset
    89
    BCKCOLORS = ['#ffffff', '#ffffa8', '#fff6a1', '#ffeb99', '#ffde91', '#ffd08a', '#ffc182', '#ffaf7a', '#ff9d73', '#ff896b', '#ff7363', '#ff5c5c']
e89c25780e6e Gradient of colors for commented text depends on number of comments including replies, instead of depending only on number of threads.
gibus
parents: 502
diff changeset
    90
    for i in range(14) :
e89c25780e6e Gradient of colors for commented text depends on number of comments including replies, instead of depending only on number of threads.
gibus
parents: 502
diff changeset
    91
        BCKCOLORS.append('#ff5c5c')
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    92
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    93
    ret = text 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    94
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    95
    if with_markers:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    96
        end_ids.reverse()
360
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents: 308
diff changeset
    97
        ret = "%s%s%s"%(''.join(["[%s&gt;"%start_id for start_id in start_ids]), ret, ''.join(["&lt;%s]"%end_id for end_id in end_ids]))
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    98
     
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    99
    if with_colors and color != 0 :
365
a478cb9786fd For some reasons, abiwords can read background style attribute but not background-color
gibus
parents: 360
diff changeset
   100
      # For some reasons, abiwords can read background style attribute but not background-color
a478cb9786fd For some reasons, abiwords can read background style attribute but not background-color
gibus
parents: 360
diff changeset
   101
      from cm.cm_settings import USE_ABI
a478cb9786fd For some reasons, abiwords can read background style attribute but not background-color
gibus
parents: 360
diff changeset
   102
      if USE_ABI:
a478cb9786fd For some reasons, abiwords can read background style attribute but not background-color
gibus
parents: 360
diff changeset
   103
        ret = "<span style='background:%s;'>%s</span>"%(BCKCOLORS[color], ret)
a478cb9786fd For some reasons, abiwords can read background style attribute but not background-color
gibus
parents: 360
diff changeset
   104
      else:
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   105
        ret = "<span style='background-color:%s;'>%s</span>"%(BCKCOLORS[color], ret)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   106
        
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   107
    return ret
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   108
103
61fd17f9ab78 enh: detached comments
rbernard
parents: 58
diff changeset
   109
# comments are comments and replies :
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   110
def insert_comment_markers(htmlcontent, comments, with_markers, with_colors) :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   111
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   112
    html = get_the_soup(htmlcontent) ;
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   113
    
58
a480a91d63ca BUG FIX : pdf export PhA tests, TODO fix special html caracters in comments
reno
parents: 0
diff changeset
   114
    if comments :
a480a91d63ca BUG FIX : pdf export PhA tests, TODO fix special html caracters in comments
reno
parents: 0
diff changeset
   115
        max_wrapper = max([comment.end_wrapper for comment in comments])
a480a91d63ca BUG FIX : pdf export PhA tests, TODO fix special html caracters in comments
reno
parents: 0
diff changeset
   116
        min_wrapper = min([comment.start_wrapper for comment in comments])
a480a91d63ca BUG FIX : pdf export PhA tests, TODO fix special html caracters in comments
reno
parents: 0
diff changeset
   117
        
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   118
    datas = {} # { wrapper_id : {'start_color':nb_of_comments_unterminated_at_wrapper_start, 'offsets':{offset: [[ids of wrappers starting at offset], [ids of wrappers ending at offset]]}}
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   119
    # datas['offsets'][someoffset][0] and idem[1] will be ordered the way comments are (should be ('start_wrapper', 'start_offset', 'end_wrapper', 'end_offset') important)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   120
    cpt = 1 # starting numbered comment
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   121
    for comment in comments :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   122
        if comment.is_reply() :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   123
            continue ;
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   124
        
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   125
        # start 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   126
        wrapper_data = datas.get(comment.start_wrapper, {'start_color':0, 'offsets':{}})
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   127
        offset = wrapper_data.get('offsets').get(comment.start_offset, [[],[]])
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   128
        offset[0].append(cpt)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   129
        wrapper_data['offsets'][comment.start_offset] = offset
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   130
        datas[comment.start_wrapper] = wrapper_data
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   131
            
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   132
        # end 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   133
        wrapper_data = datas.get(comment.end_wrapper, {'start_color':0, 'offsets':{}})
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   134
        offset = wrapper_data.get('offsets').get(comment.end_offset, [[],[]])
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   135
        offset[1].append(cpt)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   136
        wrapper_data['offsets'][comment.end_offset] = offset
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   137
        datas[comment.end_wrapper] = wrapper_data
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   138
            
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   139
        for cc in range(comment.start_wrapper + 1, comment.end_wrapper + 1) : 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   140
            wrapper_data = datas.get(cc, {'start_color':0, 'offsets':{}})
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   141
            wrapper_data['start_color'] += 1
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   142
            datas[cc] = wrapper_data
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   143
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   144
        cpt = cpt + 1
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   145
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   146
    # order ee values
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   147
    for (wrapper_id, wrapper_data) in datas.items() :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   148
        start_color = wrapper_data['start_color']
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   149
        offsets = sorted(wrapper_data['offsets'].items(), key=operator.itemgetter(0))
290
757202d3ed11 add todo note
raph
parents: 175
diff changeset
   150
308
3b92edaa5291 do not try to us comment if detached
raph
parents: 290
diff changeset
   151
        d = html.find(id = "sv-%d"%wrapper_id)
3b92edaa5291 do not try to us comment if detached
raph
parents: 290
diff changeset
   152
        if not d: # comment detached
3b92edaa5291 do not try to us comment if detached
raph
parents: 290
diff changeset
   153
            continue
3b92edaa5291 do not try to us comment if detached
raph
parents: 290
diff changeset
   154
        content = d.contents[0]
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   155
        
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   156
        spans = ""
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   157
        
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   158
        if offsets :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   159
            color = start_color
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   160
            
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   161
            start = 0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   162
            start_ids = []
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   163
            end_ids = []
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   164
            
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   165
            for offset, ids in offsets :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   166
                
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   167
                end_ids = ids[1]
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   168
                end = offset
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   169
                
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   170
                spans += add_marker(content[start:end], color, start_ids, end_ids, with_markers, with_colors)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   171
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   172
                start_ids = ids[0]
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   173
                start = end
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   174
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   175
                color += (len(ids[0]) - len(ids[1]))
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   176
                
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   177
            end_ids = []
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   178
            spans += add_marker(content[end:], color,start_ids, end_ids, with_markers, with_colors)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   179
        else : # the whole content is to be colored with start_color
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   180
            spans += add_marker(content, start_color, [], [], with_markers, with_colors)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   181
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   182
        content.replaceWith(spans)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   183
533
c7aed0340a00 For some reason, when exporting commented text with background colors, angle brackets for html spans are sometimes returned as entities by soup.
gibus
parents: 519
diff changeset
   184
    output = unicode(html)
c7aed0340a00 For some reason, when exporting commented text with background colors, angle brackets for html spans are sometimes returned as entities by soup.
gibus
parents: 519
diff changeset
   185
    # Soup has introduced HTML entities, which should be expanded
c7aed0340a00 For some reason, when exporting commented text with background colors, angle brackets for html spans are sometimes returned as entities by soup.
gibus
parents: 519
diff changeset
   186
    output =re.sub(r"&quot;", '"', output)
c7aed0340a00 For some reason, when exporting commented text with background colors, angle brackets for html spans are sometimes returned as entities by soup.
gibus
parents: 519
diff changeset
   187
    output =re.sub(r"&amp;", '&', output)
c7aed0340a00 For some reason, when exporting commented text with background colors, angle brackets for html spans are sometimes returned as entities by soup.
gibus
parents: 519
diff changeset
   188
    output =re.sub(r"&gt;", '>', output)
c7aed0340a00 For some reason, when exporting commented text with background colors, angle brackets for html spans are sometimes returned as entities by soup.
gibus
parents: 519
diff changeset
   189
    output =re.sub(r"&lt;", '<', output)
c7aed0340a00 For some reason, when exporting commented text with background colors, angle brackets for html spans are sometimes returned as entities by soup.
gibus
parents: 519
diff changeset
   190
    return unicode(output)