| author | rbernard |
| Wed, 20 Jan 2010 20:43:38 +0100 | |
| changeset 103 | 61fd17f9ab78 |
| parent 58 | a480a91d63ca |
| child 172 | 9a355903350d |
| permissions | -rw-r--r-- |
| 0 | 1 |
# -*- coding: utf-8 -*- |
2 |
from difflib import SequenceMatcher |
|
3 |
#from cm.utils.spannifier import Spannifier |
|
4 |
import sys, operator |
|
5 |
from cm.utils.spannifier import spannify |
|
6 |
from cm.converters.pandoc_converters import pandoc_convert |
|
7 |
import logging |
|
8 |
from cm.utils.spannifier import get_the_soup |
|
9 |
||
10 |
import html5lib |
|
11 |
from html5lib import treebuilders |
|
12 |
||
13 |
def compute_new_comment_positions(old_content, old_format, new_content, new_format, commentList): |
|
14 |
||
15 |
if old_format!='html': |
|
16 |
previousVersionContent = pandoc_convert(old_content, old_format, 'html') |
|
17 |
else: |
|
18 |
previousVersionContent = old_content |
|
19 |
||
20 |
if new_format != 'html': |
|
21 |
newVersionContent = pandoc_convert(new_content, new_format, 'html') |
|
22 |
else: |
|
23 |
newVersionContent = new_content |
|
24 |
||
25 |
_, previous_char_list, span_starts_previous = spannify(previousVersionContent) |
|
26 |
_, new_char_list, span_starts_new = spannify(newVersionContent) |
|
27 |
||
28 |
sm = SequenceMatcher(None, previous_char_list, new_char_list) |
|
29 |
||
30 |
opcodes = sm.get_opcodes() |
|
31 |
to_remove_comments_ids = set() |
|
32 |
||
| 103 | 33 |
# limit to real comments (not replies) and those that have scope |
34 |
commentList = [c for c in commentList if not c.is_reply() and not c.is_scope_removed()] |
|
| 0 | 35 |
|
36 |
for comment in commentList: |
|
37 |
try: |
|
38 |
comment.initial_start_offset = span_starts_previous[comment.start_wrapper] + comment.start_offset |
|
39 |
comment.initial_end_offset = span_starts_previous[comment.end_wrapper] + comment.end_offset |
|
40 |
except KeyError: |
|
41 |
logging.error('Key error (wrapper out of bounds of span_starts_previous)') |
|
42 |
continue |
|
43 |
||
44 |
comment.computed_start_offset = comment.initial_start_offset |
|
45 |
comment.computed_end_offset = comment.initial_end_offset |
|
46 |
||
47 |
# comment.computed_start_wrapper = None |
|
48 |
# comment.computed_end_wrapper = None |
|
49 |
||
50 |
comment.valid = True |
|
51 |
for tag, i1, i2, j1, j2 in opcodes: |
|
52 |
#print tag, i1, i2, j1, j2 |
|
53 |
||
54 |
for i in xrange(len(commentList)) : |
|
55 |
if tag != 'equal' : |
|
56 |
comment = commentList[i] |
|
57 |
if not comment.valid: |
|
58 |
continue |
|
59 |
||
60 |
if comment.initial_start_offset >= i2 : |
|
61 |
# if offset |
|
62 |
delta = ((j2 - j1) - (i2 - i1)) |
|
63 |
comment.computed_start_offset += delta |
|
64 |
comment.computed_end_offset += delta |
|
65 |
||
66 |
elif comment.initial_end_offset > i1: |
|
67 |
comment.valid = False |
|
68 |
||
69 |
# id, initial_start, initial_end, computed_start, computed_end, valid = self.computationResults[i] |
|
70 |
||
71 |
for c in commentList: |
|
72 |
if c.valid: |
|
73 |
for id in xrange(len(span_starts_new.keys())): |
|
74 |
start = span_starts_new.get(id) |
|
75 |
end = span_starts_new.get(id+1, sys.maxint) |
|
76 |
||
77 |
# adjust start |
|
78 |
if c.computed_start_offset >= start and c.computed_start_offset < end: |
|
79 |
c.start_wrapper = id |
|
80 |
c.start_offset = c.computed_start_offset - start |
|
81 |
||
82 |
# adjust end |
|
83 |
if c.computed_end_offset >= start and c.computed_end_offset < end: |
|
84 |
c.end_wrapper = id |
|
85 |
c.end_offset = c.computed_end_offset - start |
|
86 |
||
87 |
# returns to_modify, to_remove |
|
88 |
return [c for c in commentList if c.valid], \ |
|
89 |
[c for c in commentList if not c.valid] |
|
90 |
||
91 |
## no colors, just markers |
|
92 |
#def insert_comment_markers_and_nocolors(htmlcontent, comments): |
|
93 |
# |
|
94 |
# parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("beautifulsoup")) |
|
95 |
# html = parser.parse(htmlcontent.encode("utf8"), encoding="utf8") |
|
96 |
# |
|
97 |
# cpt = 1 |
|
98 |
# |
|
99 |
# # key : node id, value : indexes of added markers |
|
100 |
# # to remember index of inserted markers |
|
101 |
# rememberMarkerOffsets = {} |
|
102 |
# |
|
103 |
# #O(n²) ? |
|
104 |
# for comment in comments : |
|
105 |
# for i in [0,1] : |
|
106 |
# wrapper = comment.start_wrapper if i == 0 else comment.end_wrapper |
|
107 |
# offset = comment.start_offset if i == 0 else comment.end_offset |
|
108 |
# marker = "[%d>"%cpt if i == 0 else "<%d]"%cpt |
|
109 |
# marker_length = len(marker) |
|
110 |
# content = html.find(id = "sv-%d"%wrapper).contents[0] |
|
111 |
## import pdb;pdb.set_trace() |
|
112 |
# smallerIndexes = rememberMarkerOffsets.get(wrapper, []) |
|
113 |
# original_offset = offset |
|
114 |
# offset += marker_length * len([index for index in smallerIndexes if index <= offset]) |
|
115 |
# |
|
116 |
# smallerIndexes.append(original_offset) |
|
117 |
# rememberMarkerOffsets[wrapper] = smallerIndexes |
|
118 |
# |
|
119 |
# content.replaceWith(content[:offset]+marker+content[offset:]) |
|
120 |
# |
|
121 |
# cpt = cpt + 1 |
|
122 |
# |
|
123 |
# return unicode(html) |
|
124 |
||
125 |
def add_marker(text, color, start_ids, end_ids, with_markers, with_colors): |
|
126 |
# TODO |
|
127 |
# THESE 3 LINES ARE REALLY JUST FOR TESTING THIS IS COPIED FROM C-TEXT.CSS AND SHOULD BE DONE DIFFERENTLY |
|
128 |
BCKCOLORS = ['#FFF', '#FFF39A', '#FFDB9A', '#FFC39A', '#FFAB9A', '#FF879A', '#FF7B9A', '#FF6272'] |
|
129 |
for i in range(30) : |
|
130 |
BCKCOLORS.append('#FF6272') |
|
131 |
||
132 |
ret = text |
|
133 |
||
134 |
if with_markers: |
|
135 |
end_ids.reverse() |
|
136 |
ret = "%s%s%s"%(''.join(["[%s>"%start_id for start_id in start_ids]), ret, ''.join(["<%s]"%end_id for end_id in end_ids])) |
|
137 |
||
138 |
if with_colors and color != 0 : |
|
139 |
ret = "<span style='background-color:%s;'>%s</span>"%(BCKCOLORS[color], ret) |
|
140 |
||
141 |
return ret |
|
142 |
||
| 103 | 143 |
# comments are comments and replies : |
| 0 | 144 |
def insert_comment_markers(htmlcontent, comments, with_markers, with_colors) : |
145 |
||
146 |
# parser = html5lib.HTMLParser(tree=treebuilders.getTreeBuilder("beautifulsoup")) |
|
147 |
# html = parser.parse(htmlcontent.encode("utf8"), encoding="utf8") |
|
148 |
html = get_the_soup(htmlcontent) ; |
|
149 |
||
150 |
# import pdb;pdb.set_trace() |
|
|
58
a480a91d63ca
BUG FIX : pdf export PhA tests, TODO fix special html caracters in comments
reno
parents:
0
diff
changeset
|
151 |
if comments : |
|
a480a91d63ca
BUG FIX : pdf export PhA tests, TODO fix special html caracters in comments
reno
parents:
0
diff
changeset
|
152 |
max_wrapper = max([comment.end_wrapper for comment in comments]) |
|
a480a91d63ca
BUG FIX : pdf export PhA tests, TODO fix special html caracters in comments
reno
parents:
0
diff
changeset
|
153 |
min_wrapper = min([comment.start_wrapper for comment in comments]) |
|
a480a91d63ca
BUG FIX : pdf export PhA tests, TODO fix special html caracters in comments
reno
parents:
0
diff
changeset
|
154 |
|
| 0 | 155 |
datas = {} #Â { wrapper_id : {'start_color':nb_of_comments_unterminated_at_wrapper_start, 'offsets':{offset: [[ids of wrappers starting at offset], [ids of wrappers ending at offset]]}} |
156 |
#Â datas['offsets'][someoffset][0] and idem[1] will be ordered the way comments are (should be ('start_wrapper', 'start_offset', 'end_wrapper', 'end_offset') important) |
|
157 |
cpt = 1 #Â starting numbered comment |
|
158 |
for comment in comments : |
|
159 |
if comment.is_reply() : |
|
160 |
continue ; |
|
161 |
||
162 |
#import pdb;pdb.set_trace() |
|
163 |
# start |
|
164 |
wrapper_data = datas.get(comment.start_wrapper, {'start_color':0, 'offsets':{}}) |
|
165 |
offset = wrapper_data.get('offsets').get(comment.start_offset, [[],[]]) |
|
166 |
offset[0].append(cpt) |
|
167 |
#offset[0].append(comment.id) |
|
168 |
wrapper_data['offsets'][comment.start_offset] = offset |
|
169 |
datas[comment.start_wrapper] = wrapper_data |
|
170 |
||
171 |
# end |
|
172 |
wrapper_data = datas.get(comment.end_wrapper, {'start_color':0, 'offsets':{}}) |
|
173 |
offset = wrapper_data.get('offsets').get(comment.end_offset, [[],[]]) |
|
174 |
offset[1].append(cpt) |
|
175 |
#offset[1].append(comment.id) |
|
176 |
wrapper_data['offsets'][comment.end_offset] = offset |
|
177 |
datas[comment.end_wrapper] = wrapper_data |
|
178 |
||
179 |
for cc in range(comment.start_wrapper + 1, comment.end_wrapper + 1) : |
|
180 |
wrapper_data = datas.get(cc, {'start_color':0, 'offsets':{}}) |
|
181 |
wrapper_data['start_color'] += 1 |
|
182 |
datas[cc] = wrapper_data |
|
183 |
||
184 |
cpt = cpt + 1 |
|
185 |
||
186 |
# order ee values |
|
187 |
for (wrapper_id, wrapper_data) in datas.items() : |
|
188 |
start_color = wrapper_data['start_color'] |
|
189 |
offsets = sorted(wrapper_data['offsets'].items(), key=operator.itemgetter(0)) |
|
190 |
||
191 |
content = html.find(id = "sv-%d"%wrapper_id).contents[0] |
|
192 |
||
193 |
spans = "" |
|
194 |
||
195 |
if offsets : |
|
196 |
color = start_color |
|
197 |
||
198 |
start = 0 |
|
199 |
start_ids = [] |
|
200 |
end_ids = [] |
|
201 |
||
202 |
# for offset, nbs in offsets : |
|
203 |
for offset, ids in offsets : |
|
204 |
||
205 |
end_ids = ids[1] |
|
206 |
end = offset |
|
207 |
||
208 |
spans += add_marker(content[start:end], color, start_ids, end_ids, with_markers, with_colors) |
|
209 |
||
210 |
start_ids = ids[0] |
|
211 |
start = end |
|
212 |
||
213 |
color += (len(ids[0]) - len(ids[1])) |
|
214 |
||
215 |
end_ids = [] |
|
216 |
spans += add_marker(content[end:], color,start_ids, end_ids, with_markers, with_colors) |
|
217 |
else : # the whole content is to be colored with start_color |
|
218 |
spans += add_marker(content, start_color, [], [], with_markers, with_colors) |
|
219 |
||
220 |
content.replaceWith(spans) |
|
221 |
||
222 |
return unicode(html) |
|
223 |
||
224 |
#def output_comment_line(comment) : |
|
225 |
# ret = "<tr>" |
|
226 |
# for i in range(comment.depth()) : |
|
227 |
# ret = ret + """<td width="1 em"></td>""" |
|
228 |
# |
|
229 |
# ret = ret + """<td width="1 em">[%d]</td><td>""" |
|
230 |
# |
|
231 |
# |
|
232 |
#def output_comments(comments) : |
|
233 |
# |
|
234 |
# max_depth = max([comment.depth() for comment in comments]) |
|
235 |
# top_comments = [comment for comment in comments if comment.reply_to_id == None] |
|
236 |
# top_comment_cpt = 0 |
|
237 |
# html_comments = "" |
|
238 |
# |
|
239 |
# for top_comment in top_comments : |
|
240 |
# html_comments = html_comments + """<table>""" |
|
241 |
# |
|
242 |
# html_comments = html_comments + "<table><tr>" |
|
243 |
# |
|
244 |
# html_comments = html_comments + "</table>" |
|
245 |
# |
|
246 |
# top_comment_cpt = top_comment_cpt + 1 |
|
247 |
# |
|
248 |
# ret = "%s%s%s"%("""<div class="pagebreakhere">""", html_comments, """</div>""") |
|
249 |
# return ret |