src/cm/utils/comment_positioning.py
author gibus
Mon, 06 Aug 2012 13:42:15 +0200
changeset 454 b7a092a52eae
parent 365 a478cb9786fd
child 496 c3df46754007
permissions -rw-r--r--
Cleaned export.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     2
from difflib import SequenceMatcher
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     3
#from cm.utils.spannifier import Spannifier
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     4
import sys, operator
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     5
from cm.utils.spannifier import spannify
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     6
from cm.converters.pandoc_converters import pandoc_convert
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     7
import logging
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     8
from cm.utils.spannifier import get_the_soup
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
     9
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    10
import html5lib
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    11
from html5lib import treebuilders
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    12
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    13
def compute_new_comment_positions(old_content, old_format, new_content, new_format, commentList):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    14
    
175
4f072edc51a1 BUG FIX : handling html
rbernard
parents: 173
diff changeset
    15
    # cf. TextVersion.get_content
4f072edc51a1 BUG FIX : handling html
rbernard
parents: 173
diff changeset
    16
    previousVersionContent = pandoc_convert(old_content, old_format, 'html')
4f072edc51a1 BUG FIX : handling html
rbernard
parents: 173
diff changeset
    17
    newVersionContent = pandoc_convert(new_content, new_format, 'html')
4f072edc51a1 BUG FIX : handling html
rbernard
parents: 173
diff changeset
    18
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    19
    _, previous_char_list, span_starts_previous = spannify(previousVersionContent)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    20
    _, new_char_list, span_starts_new = spannify(newVersionContent)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    21
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    22
    sm = SequenceMatcher(None, previous_char_list, new_char_list)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    23
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    24
    opcodes = sm.get_opcodes()
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    25
    to_remove_comments_ids = set()
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    26
    
103
61fd17f9ab78 enh: detached comments
rbernard
parents: 58
diff changeset
    27
    # limit to real comments (not replies) and those that have scope 
61fd17f9ab78 enh: detached comments
rbernard
parents: 58
diff changeset
    28
    commentList = [c for c in commentList if not c.is_reply() and not c.is_scope_removed()]
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    29
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    30
    for comment in commentList:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    31
        try:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    32
            comment.initial_start_offset = span_starts_previous[comment.start_wrapper] + comment.start_offset
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    33
            comment.initial_end_offset = span_starts_previous[comment.end_wrapper] + comment.end_offset
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    34
        except KeyError:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    35
            logging.error('Key error (wrapper out of bounds of span_starts_previous)')
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    36
            continue
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    37
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    38
        comment.computed_start_offset = comment.initial_start_offset
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    39
        comment.computed_end_offset = comment.initial_end_offset
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    40
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    41
#        comment.computed_start_wrapper = None
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    42
#        comment.computed_end_wrapper = None
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    43
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    44
        comment.valid = True
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    45
    for tag, i1, i2, j1, j2 in opcodes:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    46
        #print tag, i1, i2, j1, j2
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    47
        
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    48
        for i in xrange(len(commentList)) :            
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    49
            if tag != 'equal' :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    50
                comment = commentList[i]
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    51
                if not comment.valid:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    52
                    continue
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    53
                
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    54
                if comment.initial_start_offset >= i2 :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    55
                    # if offset
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    56
                    delta = ((j2 - j1) - (i2 - i1))
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    57
                    comment.computed_start_offset += delta
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    58
                    comment.computed_end_offset += delta
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    59
                    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    60
                elif comment.initial_end_offset > i1:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    61
                    comment.valid = False
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    62
                    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    63
        #    id, initial_start, initial_end, computed_start, computed_end, valid = self.computationResults[i]
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    64
172
9a355903350d renaming variables
rbernard
parents: 103
diff changeset
    65
    for cc in commentList:        
9a355903350d renaming variables
rbernard
parents: 103
diff changeset
    66
        if cc.valid:
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    67
            for id in xrange(len(span_starts_new.keys())):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    68
                start = span_starts_new.get(id)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    69
                end = span_starts_new.get(id+1, sys.maxint)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    70
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    71
                # adjust start                
172
9a355903350d renaming variables
rbernard
parents: 103
diff changeset
    72
                if cc.computed_start_offset >= start and cc.computed_start_offset < end:
9a355903350d renaming variables
rbernard
parents: 103
diff changeset
    73
                    cc.start_wrapper = id
9a355903350d renaming variables
rbernard
parents: 103
diff changeset
    74
                    cc.start_offset = cc.computed_start_offset - start
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    75
                
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    76
                # adjust end                        
172
9a355903350d renaming variables
rbernard
parents: 103
diff changeset
    77
                if cc.computed_end_offset >= start and cc.computed_end_offset < end:
9a355903350d renaming variables
rbernard
parents: 103
diff changeset
    78
                    cc.end_wrapper = id
9a355903350d renaming variables
rbernard
parents: 103
diff changeset
    79
                    cc.end_offset = cc.computed_end_offset - start
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    80
            
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    81
    # returns to_modify, to_remove
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    82
    return [c for c in commentList if c.valid], \
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    83
           [c for c in commentList if not c.valid]
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    84
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    85
def add_marker(text, color, start_ids, end_ids, with_markers, with_colors):
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    86
# TODO
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    87
# THESE 3 LINES ARE REALLY JUST FOR TESTING THIS IS COPIED FROM C-TEXT.CSS AND SHOULD BE DONE DIFFERENTLY
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    88
    BCKCOLORS = ['#FFF', '#FFF39A', '#FFDB9A', '#FFC39A', '#FFAB9A', '#FF879A', '#FF7B9A', '#FF6272']
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    89
    for i in range(30) :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    90
        BCKCOLORS.append('#FF6272')
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    91
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    92
    ret = text 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    93
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    94
    if with_markers:
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    95
        end_ids.reverse()
360
bfaab8740995 Add abiword as an alternative to open office for conversions
gibus
parents: 308
diff changeset
    96
        ret = "%s%s%s"%(''.join(["[%s&gt;"%start_id for start_id in start_ids]), ret, ''.join(["&lt;%s]"%end_id for end_id in end_ids]))
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    97
     
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
    98
    if with_colors and color != 0 :
365
a478cb9786fd For some reasons, abiwords can read background style attribute but not background-color
gibus
parents: 360
diff changeset
    99
      # For some reasons, abiwords can read background style attribute but not background-color
a478cb9786fd For some reasons, abiwords can read background style attribute but not background-color
gibus
parents: 360
diff changeset
   100
      from cm.cm_settings import USE_ABI
a478cb9786fd For some reasons, abiwords can read background style attribute but not background-color
gibus
parents: 360
diff changeset
   101
      if USE_ABI:
a478cb9786fd For some reasons, abiwords can read background style attribute but not background-color
gibus
parents: 360
diff changeset
   102
        ret = "<span style='background:%s;'>%s</span>"%(BCKCOLORS[color], ret)
a478cb9786fd For some reasons, abiwords can read background style attribute but not background-color
gibus
parents: 360
diff changeset
   103
      else:
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   104
        ret = "<span style='background-color:%s;'>%s</span>"%(BCKCOLORS[color], ret)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   105
        
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   106
    return ret
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   107
103
61fd17f9ab78 enh: detached comments
rbernard
parents: 58
diff changeset
   108
# comments are comments and replies :
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   109
def insert_comment_markers(htmlcontent, comments, with_markers, with_colors) :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   110
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   111
    html = get_the_soup(htmlcontent) ;
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   112
    
58
a480a91d63ca BUG FIX : pdf export PhA tests, TODO fix special html caracters in comments
reno
parents: 0
diff changeset
   113
    if comments :
a480a91d63ca BUG FIX : pdf export PhA tests, TODO fix special html caracters in comments
reno
parents: 0
diff changeset
   114
        max_wrapper = max([comment.end_wrapper for comment in comments])
a480a91d63ca BUG FIX : pdf export PhA tests, TODO fix special html caracters in comments
reno
parents: 0
diff changeset
   115
        min_wrapper = min([comment.start_wrapper for comment in comments])
a480a91d63ca BUG FIX : pdf export PhA tests, TODO fix special html caracters in comments
reno
parents: 0
diff changeset
   116
        
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   117
    datas = {} # { wrapper_id : {'start_color':nb_of_comments_unterminated_at_wrapper_start, 'offsets':{offset: [[ids of wrappers starting at offset], [ids of wrappers ending at offset]]}}
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   118
    # datas['offsets'][someoffset][0] and idem[1] will be ordered the way comments are (should be ('start_wrapper', 'start_offset', 'end_wrapper', 'end_offset') important)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   119
    cpt = 1 # starting numbered comment
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   120
    for comment in comments :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   121
        if comment.is_reply() :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   122
            continue ;
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   123
        
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   124
        # start 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   125
        wrapper_data = datas.get(comment.start_wrapper, {'start_color':0, 'offsets':{}})
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   126
        offset = wrapper_data.get('offsets').get(comment.start_offset, [[],[]])
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   127
        offset[0].append(cpt)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   128
        wrapper_data['offsets'][comment.start_offset] = offset
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   129
        datas[comment.start_wrapper] = wrapper_data
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   130
            
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   131
        # end 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   132
        wrapper_data = datas.get(comment.end_wrapper, {'start_color':0, 'offsets':{}})
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   133
        offset = wrapper_data.get('offsets').get(comment.end_offset, [[],[]])
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   134
        offset[1].append(cpt)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   135
        wrapper_data['offsets'][comment.end_offset] = offset
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   136
        datas[comment.end_wrapper] = wrapper_data
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   137
            
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   138
        for cc in range(comment.start_wrapper + 1, comment.end_wrapper + 1) : 
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   139
            wrapper_data = datas.get(cc, {'start_color':0, 'offsets':{}})
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   140
            wrapper_data['start_color'] += 1
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   141
            datas[cc] = wrapper_data
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   142
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   143
        cpt = cpt + 1
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   144
    
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   145
    # order ee values
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   146
    for (wrapper_id, wrapper_data) in datas.items() :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   147
        start_color = wrapper_data['start_color']
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   148
        offsets = sorted(wrapper_data['offsets'].items(), key=operator.itemgetter(0))
290
757202d3ed11 add todo note
raph
parents: 175
diff changeset
   149
308
3b92edaa5291 do not try to us comment if detached
raph
parents: 290
diff changeset
   150
        d = html.find(id = "sv-%d"%wrapper_id)
3b92edaa5291 do not try to us comment if detached
raph
parents: 290
diff changeset
   151
        if not d: # comment detached
3b92edaa5291 do not try to us comment if detached
raph
parents: 290
diff changeset
   152
            continue
3b92edaa5291 do not try to us comment if detached
raph
parents: 290
diff changeset
   153
        content = d.contents[0]
0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   154
        
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   155
        spans = ""
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   156
        
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   157
        if offsets :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   158
            color = start_color
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   159
            
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   160
            start = 0
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   161
            start_ids = []
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   162
            end_ids = []
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   163
            
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   164
            for offset, ids in offsets :
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   165
                
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   166
                end_ids = ids[1]
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   167
                end = offset
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   168
                
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   169
                spans += add_marker(content[start:end], color, start_ids, end_ids, with_markers, with_colors)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   170
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   171
                start_ids = ids[0]
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   172
                start = end
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   173
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   174
                color += (len(ids[0]) - len(ids[1]))
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   175
                
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   176
            end_ids = []
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   177
            spans += add_marker(content[end:], color,start_ids, end_ids, with_markers, with_colors)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   178
        else : # the whole content is to be colored with start_color
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   179
            spans += add_marker(content, start_color, [], [], with_markers, with_colors)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   180
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   181
        content.replaceWith(spans)
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   182
40c8f766c9b8 import from internal svn r 4007
raph
parents:
diff changeset
   183
    return unicode(html)