2 # |
2 # |
3 # See LICENCE for detail |
3 # See LICENCE for detail |
4 # Copyright (c) 2014 IRI |
4 # Copyright (c) 2014 IRI |
5 # |
5 # |
6 |
6 |
|
7 import bisect |
|
8 import datetime |
|
9 import json |
|
10 import logging |
|
11 import re |
|
12 import uuid |
|
13 |
|
14 from dateutil.parser import parse as parse_date_raw |
|
15 from dateutil.tz import tzutc |
|
16 |
|
17 from lxml import etree |
|
18 |
|
19 import requests |
|
20 |
|
21 |
|
22 logger = logging.getLogger(__name__) |
7 |
23 |
8 PIANOROLL_CHANNEL = 'PIANOROLL' |
24 PIANOROLL_CHANNEL = 'PIANOROLL' |
9 ANNOTATION_CHANNEL = 'ANNOT' |
25 ANNOTATION_CHANNEL = 'ANNOT' |
|
26 |
|
27 |
|
28 class AnnotationsSynchronizer(object): |
|
29 |
|
30 LDT_CONTENT_REST_API_PATH = "api/ldt/1.0/contents/" |
|
31 LDT_PROJECT_REST_API_PATH = "api/ldt/1.0/projects/" |
|
32 DEFAULT_ANNOTATION_CHANNEL = 'ANNOT' |
|
33 |
|
34 def parse_date(self, datestr): |
|
35 res = parse_date_raw(datestr) |
|
36 if res.tzinfo is None: |
|
37 res = res.replace(tzinfo=tzutc()) |
|
38 return res |
|
39 |
|
40 def find_delta(self, ts): |
|
41 if self.deltas: |
|
42 i = bisect.bisect_right(self.deltas, (ts+1,0)) |
|
43 if i: |
|
44 return self.deltas[i-1] |
|
45 return (0,0) |
|
46 |
|
47 |
|
48 def get_filter(self): |
|
49 res = [] |
|
50 if self.start_date: |
|
51 res.append({'name': 'ts', 'op': ">=", 'val':self.start_date.isoformat() }) |
|
52 if self.end_date: |
|
53 res.append({'name': 'ts', 'op': "<=", 'val':self.end_date.isoformat() }) |
|
54 if self.events: |
|
55 res.append({'name': 'event_code', 'op': "in", 'val': self.events }) |
|
56 if self.channels: |
|
57 res.append({'name': 'channel', 'op': "in", 'val': self.channels }) |
|
58 if self.user_whitelist: |
|
59 res.append({'name': 'user', 'op': "in", 'val': self.user_whitelist }) |
|
60 return res |
|
61 |
|
62 |
|
63 def build_annotation_iterator(self, params): |
|
64 page = 0 |
|
65 page_nb = 1 |
|
66 while page < page_nb: |
|
67 page += 1 |
|
68 params['page'] = page |
|
69 resp = requests.get(self.annot_url, params=params, headers={'Content-Type': 'application/json'}) |
|
70 if resp.status_code != requests.codes.ok: |
|
71 self.logger.debug("build_annotation_iterator : request %s : return code %r " % (resp.request.url, resp.status_code)) |
|
72 return |
|
73 resp_json = resp.json() |
|
74 page_nb = resp_json.get('total_pages', 1) |
|
75 for item in resp_json.get('objects', []): |
|
76 #TODO: add progress log |
|
77 yield item |
|
78 |
|
79 |
|
80 |
|
81 def __init__(self, start_date=None, end_date=None, duration=None, content_file=None, |
|
82 content_file_write=None, project_id=None, |
|
83 channels=[DEFAULT_ANNOTATION_CHANNEL], events=[], annot_url=None, |
|
84 user_whitelist=None, post_param={}, deltas=None, base_url="http://ldt.iri.centrepompidou.fr/ldtplatform/", |
|
85 content=None, content_id=None, video=None, |
|
86 replace=True, merge=False, name="", batch_size=500, |
|
87 filename="project.ldt", color="16763904", logger=logger): |
|
88 |
|
89 self.logger = logger |
|
90 self.base_url = base_url.rstrip("/")+"/" if base_url else base_url |
|
91 self.deltas = deltas |
|
92 self.post_param = {} |
|
93 if isinstance(post_param, basestring): |
|
94 self.post_param = json.loads(post_param) |
|
95 elif post_param: |
|
96 self.post_param = post_param |
|
97 |
|
98 self.start_date = None |
|
99 if start_date: |
|
100 if isinstance(start_date, datetime.datetime): |
|
101 self.start_date = start_date |
|
102 else: |
|
103 self.start_date = self.parse_date(str(start_date)) |
|
104 |
|
105 self.duration = duration |
|
106 self.end_date = None |
|
107 if end_date: |
|
108 if isinstance(end_date, datetime.datetime): |
|
109 self.end_date = end_date |
|
110 else: |
|
111 self.end_date= self.parse_date(str(end_date)) |
|
112 |
|
113 if end_date: |
|
114 if isinstance(end_date, datetime.datetime): |
|
115 self.end_date = end_date |
|
116 else: |
|
117 self.end_date= self.parse_date(str(end_date)) |
|
118 elif self.start_date and self.duration: |
|
119 self.end_date = self.start_date + datetime.timedelta(seconds=self.duration) |
|
120 elif self.start_date and self.base_url: |
|
121 # get duration from api |
|
122 content_url = self.base_url + AnnotationsSynchronizer.LDT_CONTENT_REST_API_PATH + content_id + "/?format=json" |
|
123 self.logger.debug("get duration " + content_url) #@UndefinedVariable |
|
124 r = requests.get(content_url, params=self.post_param) |
|
125 self.logger.debug("get duration resp " + repr(r)) #@UndefinedVariable |
|
126 self.duration = int(r.json()['duration']) |
|
127 self.logger.debug("get duration " + repr(self.duration)) #@UndefinedVariable |
|
128 |
|
129 self.end_date = self.start_date + datetime.timedelta(seconds=int(self.duration/1000)) |
|
130 |
|
131 if self.end_date and self.deltas: |
|
132 self.end_date = self.end_date + datetime.timedelta(milliseconds=self.deltas[-1][1]) |
|
133 |
|
134 self.content_file = content_file |
|
135 self.project_id = project_id |
|
136 if self.project_id is not None: |
|
137 self.content_file = self.base_url + AnnotationsSynchronizer.LDT_PROJECT_REST_API_PATH + self.project_id + "/?format=json" |
|
138 self.content_file_write = content_file_write |
|
139 if self.content_file_write is None and self.project_id: |
|
140 self.content_file_write = self.content_file |
|
141 self.channels = list(set(channels)) |
|
142 self.annot_url = annot_url |
|
143 self.events = list(set(events)) |
|
144 self.user_whitelist_file = user_whitelist |
|
145 if self.user_whitelist_file: |
|
146 with open(user_whitelist_file, 'r+') as f: |
|
147 self.user_whitelist = list(set([s.strip() for s in f])) |
|
148 else: |
|
149 self.user_whitelist = None |
|
150 self.content = content |
|
151 self.content_id = content_id |
|
152 self.video = video |
|
153 self.replace = replace |
|
154 self.merge = merge |
|
155 self.name = name |
|
156 self.batch_size = batch_size |
|
157 self.filename = filename |
|
158 self.color = color |
|
159 |
|
160 def export_annotations(self): |
|
161 |
|
162 root = None |
|
163 ensemble_parent = None |
|
164 |
|
165 #to do : analyse situation ldt or iri ? filename set or not ? |
|
166 |
|
167 if self.content_file and self.content_file.find("http://") == 0: |
|
168 |
|
169 self.logger.debug("url : " + self.content_file) #@UndefinedVariable |
|
170 |
|
171 r = requests.get(self.content_file, params=self.post_param) |
|
172 self.logger.debug("url response " + repr(r) + " content " + repr(r.text)) #@UndefinedVariable |
|
173 project = r.json() |
|
174 text_match = re.match(r"\<\?\s*xml.*?\?\>(.*)", project['ldt'], re.I|re.S) |
|
175 root = etree.fromstring(text_match.group(1) if text_match else project['ldt']) |
|
176 |
|
177 elif self.content_file and os.path.exists(self.content_file): |
|
178 |
|
179 doc = etree.parse(self.content_file) |
|
180 root = doc.getroot() |
|
181 |
|
182 content_id = None |
|
183 |
|
184 if root is None: |
|
185 |
|
186 root = etree.Element(u"iri") |
|
187 |
|
188 project = etree.SubElement(root, u"project", {u"abstract":u"Annotations",u"title":u"Annotations", u"user":u"IRI Web", u"id":unicode(uuid.uuid4())}) |
|
189 |
|
190 medias = etree.SubElement(root, u"medias") |
|
191 media = etree.SubElement(medias, u"media", {u"pict":u"", u"src":unicode(self.content), u"video":unicode(self.video), u"id":unicode(self.content_id), u"extra":u""}) |
|
192 |
|
193 annotations = etree.SubElement(root, u"annotations") |
|
194 content = etree.SubElement(annotations, u"content", {u"id":unicode(self.content_id)}) |
|
195 ensemble_parent = content |
|
196 |
|
197 content_id = self.content_id |
|
198 |
|
199 |
|
200 if ensemble_parent is None: |
|
201 file_type = None |
|
202 for node in root: |
|
203 if node.tag == "project": |
|
204 file_type = "ldt" |
|
205 break |
|
206 elif node.tag == "head": |
|
207 file_type = "iri" |
|
208 break |
|
209 |
|
210 if file_type == "ldt": |
|
211 media_nodes = root.xpath("//media") |
|
212 if len(media_nodes) > 0: |
|
213 media = media_nodes[0] |
|
214 annotations_node = root.find(u"annotations") |
|
215 if annotations_node is None: |
|
216 annotations_node = etree.SubElement(root, u"annotations") |
|
217 content_node = annotations_node.find(u"content") |
|
218 if content_node is None: |
|
219 content_node = etree.SubElement(annotations_node,u"content", id=media.get(u"id")) |
|
220 ensemble_parent = content_node |
|
221 content_id = content_node.get(u"id") |
|
222 display_nodes = root.xpath("//displays/display/content[@id='%s']" % content_id) |
|
223 if len(display_nodes) == 0: |
|
224 self.logger.info("No display node found. Will not update display") |
|
225 display_content_node = None |
|
226 else: |
|
227 display_content_node = display_nodes[0] |
|
228 |
|
229 elif file_type == "iri": |
|
230 body_node = root.find(u"body") |
|
231 if body_node is None: |
|
232 body_node = etree.SubElement(root, u"body") |
|
233 ensembles_node = body_node.find(u"ensembles") |
|
234 if ensembles_node is None: |
|
235 ensembles_node = etree.SubElement(body_node, u"ensembles") |
|
236 ensemble_parent = ensembles_node |
|
237 content_id = root.xpath("head/meta[@name='id']/@content")[0] |
|
238 display_content_node = None |
|
239 |
|
240 |
|
241 if ensemble_parent is None: |
|
242 self.logger.error("Can not process file") #@UndefinedVariable |
|
243 sys.exit() |
|
244 |
|
245 if self.replace: |
|
246 for ens in ensemble_parent.iterchildren(tag=u"ensemble"): |
|
247 ens_id = ens.get("id","") |
|
248 if ens_id.startswith("annot_"): |
|
249 ensemble_parent.remove(ens) |
|
250 # remove in display nodes |
|
251 if display_content_node is not None: |
|
252 for cut_display in display_content_node.iterchildren(): |
|
253 if cut_display.get('idens','') == ens_id: |
|
254 display_content_node.remove(cut_display) |
|
255 |
|
256 ensemble = None |
|
257 elements = None |
|
258 |
|
259 if self.merge: |
|
260 for ens in ensemble_parent.findall(u"ensemble"): |
|
261 if ens.get('id',"").startswith("annot_"): |
|
262 ensemble = ens |
|
263 break |
|
264 if ensemble is not None: |
|
265 elements = ensemble.find(u".//elements") |
|
266 decoupage = ensemble.find(u"decoupage") |
|
267 |
|
268 if ensemble is None or elements is None: |
|
269 ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"annot_" + unicode(uuid.uuid4()), u"title":u"Ensemble Annotation", u"author":u"IRI Web", u"abstract":u"Ensemble Annotation"}) |
|
270 decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"}) |
|
271 |
|
272 etree.SubElement(decoupage, u"title").text = unicode(self.name) |
|
273 etree.SubElement(decoupage, u"abstract").text = unicode(self.name) |
|
274 |
|
275 elements = etree.SubElement(decoupage, u"elements") |
|
276 |
|
277 ensemble_id = ensemble.get('id', '') |
|
278 decoupage_id = decoupage.get('id', '') if decoupage is not None else None |
|
279 |
|
280 |
|
281 filters = self.get_filter() |
|
282 |
|
283 |
|
284 params = { 'q':json.dumps({'filters':filters}), 'results_per_page': self.batch_size} |
|
285 |
|
286 |
|
287 for annot in self.build_annotation_iterator(params): |
|
288 annot_ts = self.parse_date(annot['ts']) |
|
289 if self.start_date is None: |
|
290 star_date = annot_ts |
|
291 annot_ts_rel = annot_ts-self.start_date |
|
292 annot_ts_rel_milli = int(round(annot_ts_rel.total_seconds()*1000)) |
|
293 d = self.find_delta(annot_ts_rel_milli) |
|
294 if d[1] < 0: |
|
295 continue |
|
296 else : |
|
297 annot_ts_rel_milli -= d[1] |
|
298 annot_content = annot.get('content',{'category':'', 'user':None}) |
|
299 |
|
300 username = annot_content.get('user', 'anon.') or 'anon.' |
|
301 |
|
302 category = annot_content.get('category', None) |
|
303 if category is None: |
|
304 continue |
|
305 |
|
306 element = etree.SubElement(elements, u"element" , {u"id":annot.get('uuid', uuid.uuid4()), u"color":unicode(self.color), u"author":unicode(username), u"date":unicode(annot_ts.strftime("%Y/%m/%d")), u"begin": unicode(annot_ts_rel_milli), u"dur":u"0"}) |
|
307 etree.SubElement(element, u"title").text = unicode(username) + u": " + unicode(category.get('label', category.get('code', ''))) |
|
308 etree.SubElement(element, u"abstract").text = unicode(category.get('label', category.get('code', ''))) |
|
309 |
|
310 tags_node = etree.SubElement(element, u"tags") |
|
311 etree.SubElement(tags_node,u"tag").text = category.get('code', '') |
|
312 |
|
313 meta_element = etree.SubElement(element, u'meta') |
|
314 |
|
315 polemics_element = etree.Element(u'polemics') |
|
316 etree.SubElement(polemics_element, u'polemic').text = category.get('code', '') |
|
317 meta_element.append(polemics_element) |
|
318 |
|
319 etree.SubElement(meta_element, u"source", attrib={"url":self.annot_url + "/" + annot['uuid'], "mimetype":u"application/json"}).text = etree.CDATA(json.dumps(annot)) |
|
320 |
|
321 # sort by tc in |
|
322 if self.merge : |
|
323 # remove all elements and put them in a array |
|
324 # sort them with tc |
|
325 #put them back |
|
326 elements[:] = sorted(elements,key=lambda n: int(n.get('begin'))) |
|
327 |
|
328 #add to display node |
|
329 if display_content_node is not None: |
|
330 display_dec = None |
|
331 for dec in display_content_node.iterchildren(tag=u"decoupage"): |
|
332 if dec.get('idens','') == ensemble_id and dec.get('id', '') == decoupage_id: |
|
333 display_dec = dec |
|
334 break |
|
335 if display_dec is None and ensemble_id and decoupage_id: |
|
336 etree.SubElement(display_content_node, u"decoupage", attrib={'idens': ensemble_id, 'id': decoupage_id, 'tagsSelect':''}) |
|
337 |
|
338 output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=False, xml_declaration=True) |
|
339 |
|
340 if self.content_file_write and self.content_file_write.find("http://") == 0: |
|
341 |
|
342 project["ldt"] = output_data |
|
343 project['owner'] = project['owner'].replace('%7E','~') |
|
344 project['contents'] = [c_url.replace('%7E','~') for c_url in project['contents']] |
|
345 |
|
346 self.logger.debug("write http " + self.content_file_write) #@UndefinedVariable |
|
347 self.logger.debug("write http " + repr(self.post_param)) #@UndefinedVariable |
|
348 self.logger.debug("write http " + repr(project)) #@UndefinedVariable |
|
349 r = requests.put(self.content_file_write, data=json.dumps(project), headers={'content-type':'application/json'}, params=self.post_param); |
|
350 self.logger.debug("write http " + repr(r) + " content " + r.text) #@UndefinedVariable |
|
351 if r.status_code != requests.codes.ok: # @UndefinedVariable |
|
352 r.raise_for_status() |
|
353 else: |
|
354 if self.content_file_write and os.path.exists(self.content_file_write): |
|
355 dest_file_name = self.content_file_write |
|
356 else: |
|
357 dest_file_name = self.filename |
|
358 |
|
359 self.logger.debug("WRITE : " + dest_file_name) #@UndefinedVariable |
|
360 output = open(dest_file_name, "w") |
|
361 output.write(output_data) |
|
362 output.flush() |
|
363 output.close() |