src/cm/utils/comment_positioning.py
changeset 0 40c8f766c9b8
child 58 a480a91d63ca
equal deleted inserted replaced
-1:000000000000 0:40c8f766c9b8
       
     1 # -*- coding: utf-8 -*-
       
     2 from difflib import SequenceMatcher
       
     3 #from cm.utils.spannifier import Spannifier
       
     4 import sys, operator
       
     5 from cm.utils.spannifier import spannify
       
     6 from cm.converters.pandoc_converters import pandoc_convert
       
     7 import logging
       
     8 from cm.utils.spannifier import get_the_soup
       
     9 
       
    10 import html5lib
       
    11 from html5lib import treebuilders
       
    12 
       
    13 def compute_new_comment_positions(old_content, old_format, new_content, new_format, commentList):
       
    14     
       
    15     if old_format!='html':
       
    16         previousVersionContent = pandoc_convert(old_content, old_format, 'html')
       
    17     else:
       
    18         previousVersionContent = old_content
       
    19         
       
    20     if new_format != 'html':
       
    21         newVersionContent = pandoc_convert(new_content, new_format, 'html')
       
    22     else:
       
    23         newVersionContent = new_content
       
    24     
       
    25     _, previous_char_list, span_starts_previous = spannify(previousVersionContent)
       
    26     _, new_char_list, span_starts_new = spannify(newVersionContent)
       
    27     
       
    28     sm = SequenceMatcher(None, previous_char_list, new_char_list)
       
    29     
       
    30     opcodes = sm.get_opcodes()
       
    31     to_remove_comments_ids = set()
       
    32     
       
    33     # limit to real comments (not replies)
       
    34     commentList = [c for c in commentList if not c.is_reply()]
       
    35     
       
    36     for comment in commentList:
       
    37         try:
       
    38             comment.initial_start_offset = span_starts_previous[comment.start_wrapper] + comment.start_offset
       
    39             comment.initial_end_offset = span_starts_previous[comment.end_wrapper] + comment.end_offset
       
    40         except KeyError:
       
    41             logging.error('Key error (wrapper out of bounds of span_starts_previous)')
       
    42             continue
       
    43 
       
    44         comment.computed_start_offset = comment.initial_start_offset
       
    45         comment.computed_end_offset = comment.initial_end_offset
       
    46 
       
    47 #        comment.computed_start_wrapper = None
       
    48 #        comment.computed_end_wrapper = None
       
    49 
       
    50         comment.valid = True
       
    51     for tag, i1, i2, j1, j2 in opcodes:
       
    52         #print tag, i1, i2, j1, j2
       
    53         
       
    54         for i in xrange(len(commentList)) :            
       
    55             if tag != 'equal' :
       
    56                 comment = commentList[i]
       
    57                 if not comment.valid:
       
    58                     continue
       
    59                 
       
    60                 if comment.initial_start_offset >= i2 :
       
    61                     # if offset
       
    62                     delta = ((j2 - j1) - (i2 - i1))
       
    63                     comment.computed_start_offset += delta
       
    64                     comment.computed_end_offset += delta
       
    65                     
       
    66                 elif comment.initial_end_offset > i1:
       
    67                     comment.valid = False
       
    68                     
       
    69         #    id, initial_start, initial_end, computed_start, computed_end, valid = self.computationResults[i]
       
    70 
       
    71     for c in commentList:        
       
    72         if c.valid:
       
    73             for id in xrange(len(span_starts_new.keys())):
       
    74                 start = span_starts_new.get(id)
       
    75                 end = span_starts_new.get(id+1, sys.maxint)
       
    76 
       
    77                 # adjust start                
       
    78                 if c.computed_start_offset >= start and c.computed_start_offset < end:
       
    79                     c.start_wrapper = id
       
    80                     c.start_offset = c.computed_start_offset - start
       
    81                 
       
    82                 # adjust end                        
       
    83                 if c.computed_end_offset >= start and c.computed_end_offset < end:
       
    84                     c.end_wrapper = id
       
    85                     c.end_offset = c.computed_end_offset - start
       
    86             
       
    87     # returns to_modify, to_remove
       
    88     return [c for c in commentList if c.valid], \
       
    89            [c for c in commentList if not c.valid]
       
    90 
       
    91 ## no colors, just markers           
       
    92 #def insert_comment_markers_and_nocolors(htmlcontent, comments):
       
    93 #    
       
    94 #    parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("beautifulsoup"))
       
    95 #    html = parser.parse(htmlcontent.encode("utf8"), encoding="utf8")
       
    96 #    
       
    97 #    cpt = 1
       
    98 #    
       
    99 #    # key : node id, value : indexes of added markers
       
   100 #    # to remember index of inserted markers
       
   101 #    rememberMarkerOffsets = {}
       
   102 #    
       
   103 #    #O(n²) ?
       
   104 #    for comment in comments :
       
   105 #        for i in [0,1] :
       
   106 #            wrapper = comment.start_wrapper if i == 0 else comment.end_wrapper
       
   107 #            offset =  comment.start_offset if i == 0 else comment.end_offset
       
   108 #            marker = "[%d>"%cpt if i == 0 else "<%d]"%cpt
       
   109 #            marker_length = len(marker)
       
   110 #            content = html.find(id = "sv-%d"%wrapper).contents[0]
       
   111 ##            import pdb;pdb.set_trace()
       
   112 #            smallerIndexes = rememberMarkerOffsets.get(wrapper, [])
       
   113 #            original_offset =  offset
       
   114 #            offset += marker_length * len([index for index in smallerIndexes if index <= offset])
       
   115 #        
       
   116 #            smallerIndexes.append(original_offset)
       
   117 #            rememberMarkerOffsets[wrapper] = smallerIndexes
       
   118 #        
       
   119 #            content.replaceWith(content[:offset]+marker+content[offset:])
       
   120 #                
       
   121 #        cpt = cpt + 1
       
   122 #    
       
   123 #    return unicode(html)
       
   124 
       
   125 def add_marker(text, color, start_ids, end_ids, with_markers, with_colors):
       
   126 # TODO
       
   127 # THESE 3 LINES ARE REALLY JUST FOR TESTING THIS IS COPIED FROM C-TEXT.CSS AND SHOULD BE DONE DIFFERENTLY
       
   128     BCKCOLORS = ['#FFF', '#FFF39A', '#FFDB9A', '#FFC39A', '#FFAB9A', '#FF879A', '#FF7B9A', '#FF6272']
       
   129     for i in range(30) :
       
   130         BCKCOLORS.append('#FF6272')
       
   131 
       
   132     ret = text 
       
   133     
       
   134     if with_markers:
       
   135         end_ids.reverse()
       
   136         ret = "%s%s%s"%(''.join(["[%s>"%start_id for start_id in start_ids]), ret, ''.join(["<%s]"%end_id for end_id in end_ids]))
       
   137      
       
   138     if with_colors and color != 0 :
       
   139         ret = "<span style='background-color:%s;'>%s</span>"%(BCKCOLORS[color], ret)
       
   140         
       
   141     return ret
       
   142 
       
   143 # comments are comments and replies : TODO $$$$$$$$$$$$ handle replies case 
       
   144 def insert_comment_markers(htmlcontent, comments, with_markers, with_colors) :
       
   145 
       
   146 #    parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("beautifulsoup"))
       
   147 #    html = parser.parse(htmlcontent.encode("utf8"), encoding="utf8")
       
   148     html = get_the_soup(htmlcontent) ;
       
   149     
       
   150 #    import pdb;pdb.set_trace()
       
   151     
       
   152     max_wrapper = max([comment.end_wrapper for comment in comments])
       
   153     min_wrapper = min([comment.start_wrapper for comment in comments])
       
   154     
       
   155     datas = {} # { wrapper_id : {'start_color':nb_of_comments_unterminated_at_wrapper_start, 'offsets':{offset: [[ids of wrappers starting at offset], [ids of wrappers ending at offset]]}}
       
   156     # datas['offsets'][someoffset][0] and idem[1] will be ordered the way comments are (should be ('start_wrapper', 'start_offset', 'end_wrapper', 'end_offset') important)
       
   157     cpt = 1 # starting numbered comment
       
   158     for comment in comments :
       
   159         if comment.is_reply() :
       
   160             continue ;
       
   161         
       
   162         #import pdb;pdb.set_trace()
       
   163         # start 
       
   164         wrapper_data = datas.get(comment.start_wrapper, {'start_color':0, 'offsets':{}})
       
   165         offset = wrapper_data.get('offsets').get(comment.start_offset, [[],[]])
       
   166         offset[0].append(cpt)
       
   167         #offset[0].append(comment.id)
       
   168         wrapper_data['offsets'][comment.start_offset] = offset
       
   169         datas[comment.start_wrapper] = wrapper_data
       
   170             
       
   171         # end 
       
   172         wrapper_data = datas.get(comment.end_wrapper, {'start_color':0, 'offsets':{}})
       
   173         offset = wrapper_data.get('offsets').get(comment.end_offset, [[],[]])
       
   174         offset[1].append(cpt)
       
   175         #offset[1].append(comment.id)
       
   176         wrapper_data['offsets'][comment.end_offset] = offset
       
   177         datas[comment.end_wrapper] = wrapper_data
       
   178             
       
   179         for cc in range(comment.start_wrapper + 1, comment.end_wrapper + 1) : 
       
   180             wrapper_data = datas.get(cc, {'start_color':0, 'offsets':{}})
       
   181             wrapper_data['start_color'] += 1
       
   182             datas[cc] = wrapper_data
       
   183 
       
   184         cpt = cpt + 1
       
   185     
       
   186     # order ee values
       
   187     for (wrapper_id, wrapper_data) in datas.items() :
       
   188         start_color = wrapper_data['start_color']
       
   189         offsets = sorted(wrapper_data['offsets'].items(), key=operator.itemgetter(0))
       
   190         
       
   191         content = html.find(id = "sv-%d"%wrapper_id).contents[0]
       
   192         
       
   193         spans = ""
       
   194         
       
   195         if offsets :
       
   196             color = start_color
       
   197             
       
   198             start = 0
       
   199             start_ids = []
       
   200             end_ids = []
       
   201             
       
   202 #            for offset, nbs in offsets :
       
   203             for offset, ids in offsets :
       
   204                 
       
   205                 end_ids = ids[1]
       
   206                 end = offset
       
   207                 
       
   208                 spans += add_marker(content[start:end], color, start_ids, end_ids, with_markers, with_colors)
       
   209 
       
   210                 start_ids = ids[0]
       
   211                 start = end
       
   212 
       
   213                 color += (len(ids[0]) - len(ids[1]))
       
   214                 
       
   215             end_ids = []
       
   216             spans += add_marker(content[end:], color,start_ids, end_ids, with_markers, with_colors)
       
   217         else : # the whole content is to be colored with start_color
       
   218             spans += add_marker(content, start_color, [], [], with_markers, with_colors)
       
   219 
       
   220         content.replaceWith(spans)
       
   221 
       
   222     return unicode(html)
       
   223 
       
   224 #def output_comment_line(comment) :
       
   225 #    ret = "<tr>"
       
   226 #    for i in range(comment.depth()) : 
       
   227 #        ret = ret + """<td width="1 em"></td>"""
       
   228 #    
       
   229 #    ret = ret + """<td width="1 em">[%d]</td><td>"""
       
   230 #    
       
   231 #
       
   232 #def output_comments(comments) :
       
   233 #
       
   234 #    max_depth = max([comment.depth() for comment in comments])
       
   235 #    top_comments = [comment for comment in comments if comment.reply_to_id == None]
       
   236 #    top_comment_cpt = 0
       
   237 #    html_comments = ""
       
   238 #    
       
   239 #    for top_comment in top_comments :
       
   240 #        html_comments = html_comments + """<table>"""
       
   241 #         
       
   242 #        html_comments = html_comments + "<table><tr>"
       
   243 #         
       
   244 #        html_comments = html_comments + "</table>" 
       
   245 #
       
   246 #        top_comment_cpt = top_comment_cpt + 1
       
   247 #    
       
   248 #    ret = "%s%s%s"%("""<div class="pagebreakhere">""", html_comments, """</div>""")
       
   249 #    return ret