|
1 # -*- coding: utf-8 -*- |
|
2 from difflib import SequenceMatcher |
|
3 #from cm.utils.spannifier import Spannifier |
|
4 import sys, operator |
|
5 from cm.utils.spannifier import spannify |
|
6 from cm.converters.pandoc_converters import pandoc_convert |
|
7 import logging |
|
8 from cm.utils.spannifier import get_the_soup |
|
9 |
|
10 import html5lib |
|
11 from html5lib import treebuilders |
|
12 |
|
13 def compute_new_comment_positions(old_content, old_format, new_content, new_format, commentList): |
|
14 |
|
15 if old_format!='html': |
|
16 previousVersionContent = pandoc_convert(old_content, old_format, 'html') |
|
17 else: |
|
18 previousVersionContent = old_content |
|
19 |
|
20 if new_format != 'html': |
|
21 newVersionContent = pandoc_convert(new_content, new_format, 'html') |
|
22 else: |
|
23 newVersionContent = new_content |
|
24 |
|
25 _, previous_char_list, span_starts_previous = spannify(previousVersionContent) |
|
26 _, new_char_list, span_starts_new = spannify(newVersionContent) |
|
27 |
|
28 sm = SequenceMatcher(None, previous_char_list, new_char_list) |
|
29 |
|
30 opcodes = sm.get_opcodes() |
|
31 to_remove_comments_ids = set() |
|
32 |
|
33 # limit to real comments (not replies) |
|
34 commentList = [c for c in commentList if not c.is_reply()] |
|
35 |
|
36 for comment in commentList: |
|
37 try: |
|
38 comment.initial_start_offset = span_starts_previous[comment.start_wrapper] + comment.start_offset |
|
39 comment.initial_end_offset = span_starts_previous[comment.end_wrapper] + comment.end_offset |
|
40 except KeyError: |
|
41 logging.error('Key error (wrapper out of bounds of span_starts_previous)') |
|
42 continue |
|
43 |
|
44 comment.computed_start_offset = comment.initial_start_offset |
|
45 comment.computed_end_offset = comment.initial_end_offset |
|
46 |
|
47 # comment.computed_start_wrapper = None |
|
48 # comment.computed_end_wrapper = None |
|
49 |
|
50 comment.valid = True |
|
51 for tag, i1, i2, j1, j2 in opcodes: |
|
52 #print tag, i1, i2, j1, j2 |
|
53 |
|
54 for i in xrange(len(commentList)) : |
|
55 if tag != 'equal' : |
|
56 comment = commentList[i] |
|
57 if not comment.valid: |
|
58 continue |
|
59 |
|
60 if comment.initial_start_offset >= i2 : |
|
61 # if offset |
|
62 delta = ((j2 - j1) - (i2 - i1)) |
|
63 comment.computed_start_offset += delta |
|
64 comment.computed_end_offset += delta |
|
65 |
|
66 elif comment.initial_end_offset > i1: |
|
67 comment.valid = False |
|
68 |
|
69 # id, initial_start, initial_end, computed_start, computed_end, valid = self.computationResults[i] |
|
70 |
|
71 for c in commentList: |
|
72 if c.valid: |
|
73 for id in xrange(len(span_starts_new.keys())): |
|
74 start = span_starts_new.get(id) |
|
75 end = span_starts_new.get(id+1, sys.maxint) |
|
76 |
|
77 # adjust start |
|
78 if c.computed_start_offset >= start and c.computed_start_offset < end: |
|
79 c.start_wrapper = id |
|
80 c.start_offset = c.computed_start_offset - start |
|
81 |
|
82 # adjust end |
|
83 if c.computed_end_offset >= start and c.computed_end_offset < end: |
|
84 c.end_wrapper = id |
|
85 c.end_offset = c.computed_end_offset - start |
|
86 |
|
87 # returns to_modify, to_remove |
|
88 return [c for c in commentList if c.valid], \ |
|
89 [c for c in commentList if not c.valid] |
|
90 |
|
91 ## no colors, just markers |
|
92 #def insert_comment_markers_and_nocolors(htmlcontent, comments): |
|
93 # |
|
94 # parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("beautifulsoup")) |
|
95 # html = parser.parse(htmlcontent.encode("utf8"), encoding="utf8") |
|
96 # |
|
97 # cpt = 1 |
|
98 # |
|
99 # # key : node id, value : indexes of added markers |
|
100 # # to remember index of inserted markers |
|
101 # rememberMarkerOffsets = {} |
|
102 # |
|
103 # #O(n²) ? |
|
104 # for comment in comments : |
|
105 # for i in [0,1] : |
|
106 # wrapper = comment.start_wrapper if i == 0 else comment.end_wrapper |
|
107 # offset = comment.start_offset if i == 0 else comment.end_offset |
|
108 # marker = "[%d>"%cpt if i == 0 else "<%d]"%cpt |
|
109 # marker_length = len(marker) |
|
110 # content = html.find(id = "sv-%d"%wrapper).contents[0] |
|
111 ## import pdb;pdb.set_trace() |
|
112 # smallerIndexes = rememberMarkerOffsets.get(wrapper, []) |
|
113 # original_offset = offset |
|
114 # offset += marker_length * len([index for index in smallerIndexes if index <= offset]) |
|
115 # |
|
116 # smallerIndexes.append(original_offset) |
|
117 # rememberMarkerOffsets[wrapper] = smallerIndexes |
|
118 # |
|
119 # content.replaceWith(content[:offset]+marker+content[offset:]) |
|
120 # |
|
121 # cpt = cpt + 1 |
|
122 # |
|
123 # return unicode(html) |
|
124 |
|
125 def add_marker(text, color, start_ids, end_ids, with_markers, with_colors): |
|
126 # TODO |
|
127 # THESE 3 LINES ARE REALLY JUST FOR TESTING THIS IS COPIED FROM C-TEXT.CSS AND SHOULD BE DONE DIFFERENTLY |
|
128 BCKCOLORS = ['#FFF', '#FFF39A', '#FFDB9A', '#FFC39A', '#FFAB9A', '#FF879A', '#FF7B9A', '#FF6272'] |
|
129 for i in range(30) : |
|
130 BCKCOLORS.append('#FF6272') |
|
131 |
|
132 ret = text |
|
133 |
|
134 if with_markers: |
|
135 end_ids.reverse() |
|
136 ret = "%s%s%s"%(''.join(["[%s>"%start_id for start_id in start_ids]), ret, ''.join(["<%s]"%end_id for end_id in end_ids])) |
|
137 |
|
138 if with_colors and color != 0 : |
|
139 ret = "<span style='background-color:%s;'>%s</span>"%(BCKCOLORS[color], ret) |
|
140 |
|
141 return ret |
|
142 |
|
143 # comments are comments and replies : TODO $$$$$$$$$$$$ handle replies case |
|
144 def insert_comment_markers(htmlcontent, comments, with_markers, with_colors) : |
|
145 |
|
146 # parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("beautifulsoup")) |
|
147 # html = parser.parse(htmlcontent.encode("utf8"), encoding="utf8") |
|
148 html = get_the_soup(htmlcontent) ; |
|
149 |
|
150 # import pdb;pdb.set_trace() |
|
151 |
|
152 max_wrapper = max([comment.end_wrapper for comment in comments]) |
|
153 min_wrapper = min([comment.start_wrapper for comment in comments]) |
|
154 |
|
155 datas = {} # { wrapper_id : {'start_color':nb_of_comments_unterminated_at_wrapper_start, 'offsets':{offset: [[ids of wrappers starting at offset], [ids of wrappers ending at offset]]}} |
|
156 # datas['offsets'][someoffset][0] and idem[1] will be ordered the way comments are (should be ('start_wrapper', 'start_offset', 'end_wrapper', 'end_offset') important) |
|
157 cpt = 1 # starting numbered comment |
|
158 for comment in comments : |
|
159 if comment.is_reply() : |
|
160 continue ; |
|
161 |
|
162 #import pdb;pdb.set_trace() |
|
163 # start |
|
164 wrapper_data = datas.get(comment.start_wrapper, {'start_color':0, 'offsets':{}}) |
|
165 offset = wrapper_data.get('offsets').get(comment.start_offset, [[],[]]) |
|
166 offset[0].append(cpt) |
|
167 #offset[0].append(comment.id) |
|
168 wrapper_data['offsets'][comment.start_offset] = offset |
|
169 datas[comment.start_wrapper] = wrapper_data |
|
170 |
|
171 # end |
|
172 wrapper_data = datas.get(comment.end_wrapper, {'start_color':0, 'offsets':{}}) |
|
173 offset = wrapper_data.get('offsets').get(comment.end_offset, [[],[]]) |
|
174 offset[1].append(cpt) |
|
175 #offset[1].append(comment.id) |
|
176 wrapper_data['offsets'][comment.end_offset] = offset |
|
177 datas[comment.end_wrapper] = wrapper_data |
|
178 |
|
179 for cc in range(comment.start_wrapper + 1, comment.end_wrapper + 1) : |
|
180 wrapper_data = datas.get(cc, {'start_color':0, 'offsets':{}}) |
|
181 wrapper_data['start_color'] += 1 |
|
182 datas[cc] = wrapper_data |
|
183 |
|
184 cpt = cpt + 1 |
|
185 |
|
186 # order ee values |
|
187 for (wrapper_id, wrapper_data) in datas.items() : |
|
188 start_color = wrapper_data['start_color'] |
|
189 offsets = sorted(wrapper_data['offsets'].items(), key=operator.itemgetter(0)) |
|
190 |
|
191 content = html.find(id = "sv-%d"%wrapper_id).contents[0] |
|
192 |
|
193 spans = "" |
|
194 |
|
195 if offsets : |
|
196 color = start_color |
|
197 |
|
198 start = 0 |
|
199 start_ids = [] |
|
200 end_ids = [] |
|
201 |
|
202 # for offset, nbs in offsets : |
|
203 for offset, ids in offsets : |
|
204 |
|
205 end_ids = ids[1] |
|
206 end = offset |
|
207 |
|
208 spans += add_marker(content[start:end], color, start_ids, end_ids, with_markers, with_colors) |
|
209 |
|
210 start_ids = ids[0] |
|
211 start = end |
|
212 |
|
213 color += (len(ids[0]) - len(ids[1])) |
|
214 |
|
215 end_ids = [] |
|
216 spans += add_marker(content[end:], color,start_ids, end_ids, with_markers, with_colors) |
|
217 else : # the whole content is to be colored with start_color |
|
218 spans += add_marker(content, start_color, [], [], with_markers, with_colors) |
|
219 |
|
220 content.replaceWith(spans) |
|
221 |
|
222 return unicode(html) |
|
223 |
|
224 #def output_comment_line(comment) : |
|
225 # ret = "<tr>" |
|
226 # for i in range(comment.depth()) : |
|
227 # ret = ret + """<td width="1 em"></td>""" |
|
228 # |
|
229 # ret = ret + """<td width="1 em">[%d]</td><td>""" |
|
230 # |
|
231 # |
|
232 #def output_comments(comments) : |
|
233 # |
|
234 # max_depth = max([comment.depth() for comment in comments]) |
|
235 # top_comments = [comment for comment in comments if comment.reply_to_id == None] |
|
236 # top_comment_cpt = 0 |
|
237 # html_comments = "" |
|
238 # |
|
239 # for top_comment in top_comments : |
|
240 # html_comments = html_comments + """<table>""" |
|
241 # |
|
242 # html_comments = html_comments + "<table><tr>" |
|
243 # |
|
244 # html_comments = html_comments + "</table>" |
|
245 # |
|
246 # top_comment_cpt = top_comment_cpt + 1 |
|
247 # |
|
248 # ret = "%s%s%s"%("""<div class="pagebreakhere">""", html_comments, """</div>""") |
|
249 # return ret |