|
1 #!/usr/bin/env python |
|
2 # coding=utf-8 |
|
3 |
|
4 from lxml import etree |
|
5 from iri_tweet.models import setup_database |
|
6 from optparse import OptionParser #@UnresolvedImport |
|
7 from sqlalchemy import Table, Column, BigInteger |
|
8 from iri_tweet.utils import (set_logging_options, set_logging, get_filter_query, |
|
9 get_logger) |
|
10 import anyjson |
|
11 import datetime |
|
12 import httplib2 |
|
13 import os.path |
|
14 import re |
|
15 import sys |
|
16 import time |
|
17 import uuid #@UnresolvedImport |
|
18 from dateutil.parser import parse as parse_date |
|
19 import json |
|
20 import functools |
|
21 |
|
22 |
|
23 class EtherpadRequestException(Exception): |
|
24 def __init__(self, original_resp): |
|
25 super(EtherpadRequestException, self).__init__(original_resp["message"]) |
|
26 self.status = original_resp["status"] |
|
27 self.original_resp = original_resp |
|
28 |
|
29 |
|
30 class EtherpadRequest(): |
|
31 |
|
32 def __init__(self, base_url, api_key): |
|
33 self.base_url = base_url |
|
34 self.api_key = api_key |
|
35 self.__request = None |
|
36 |
|
37 def __getattr__(self, name): |
|
38 return functools.partial(self.__action, name) |
|
39 |
|
40 def __action(self, action, **kwargs): |
|
41 url = "%s/%s" % (self.base_url, action) |
|
42 params = dict(kwargs) |
|
43 params['apikey'] = self.api_key |
|
44 |
|
45 r = requests.get(url, params) |
|
46 |
|
47 resp = anyjson.deserialize(r.text) |
|
48 |
|
49 if resp["code"] == 0: |
|
50 return resp["data"] |
|
51 else: |
|
52 raise EtherpadRequestException(resp) |
|
53 |
|
54 return resp |
|
55 |
|
56 def getRevisionsCount(self, padID): |
|
57 f = self.__getattr__("getRevisionsCount") |
|
58 res = f(padID=padID) |
|
59 |
|
60 return res["revisions"] |
|
61 |
|
62 def getPadUrl(self, padID): |
|
63 |
|
64 return "%s/%s" % (self.base_url,padID) |
|
65 |
|
66 |
|
67 |
|
68 def abort(message, parser): |
|
69 if message is not None: |
|
70 sys.stderr.write(message + "\n") |
|
71 parser.print_help() |
|
72 sys.exit(1) |
|
73 |
|
74 def get_options(): |
|
75 |
|
76 parser = OptionParser() |
|
77 parser.add_option("-u", "--api-url", dest="api_url", |
|
78 help="Base etherpad-lite api url", metavar="API_URL", default=None) |
|
79 parser.add_option("-k", "--api-key", dest="api_key", |
|
80 help="Base etherpad-lite api url", metavar="API_KEY", default=None) |
|
81 parser.add_option("-p", "--pad-id", dest="pad_id", |
|
82 help="pad id", metavar="PADID") |
|
83 parser.add_option("-s", "--start-date", dest="start_date", |
|
84 help="start date", metavar="START_DATE", default=None) |
|
85 parser.add_option("-e", "--end-date", dest="end_date", |
|
86 help="end date", metavar="END_DATE", default=None) |
|
87 parser.add_option("-f", "--format", dest="format", type="choice", |
|
88 help="format", metavar="FORMAT", choice=['html', 'text'], default='html') |
|
89 parser.add_option("-I", "--content-file", dest="content_file", |
|
90 help="Content file", metavar="CONTENT_FILE") |
|
91 parser.add_option("-C", "--color", dest="color", |
|
92 help="Color code", metavar="COLOR", default="16763904") |
|
93 parser.add_option("-D", "--duration", dest="duration", type="int", |
|
94 help="Duration", metavar="DURATION", default=None) |
|
95 parser.add_option("-n", "--name", dest="name", |
|
96 help="Cutting name", metavar="NAME", default=u"pads") |
|
97 parser.add_option("-R", "--replace", dest="replace", action="store_true", |
|
98 help="Replace tweet ensemble", metavar="REPLACE", default=False) |
|
99 parser.add_option("-m", "--merge", dest="merge", action="store_true", |
|
100 help="merge tweet ensemble, choose the first ensemble", metavar="MERGE", default=False) |
|
101 parser.add_option("-E", "--extended", dest="extended_mode", action="store_true", |
|
102 help="Trigger polemic extended mode", metavar="EXTENDED", default=False) |
|
103 parser.add_option("-S", "--step", dest="step", type=1, |
|
104 help="step for version", metavar="STEP", default=False) |
|
105 |
|
106 |
|
107 |
|
108 set_logging_options(parser) |
|
109 |
|
110 |
|
111 return parser.parse_args() + (parser,) |
|
112 |
|
113 |
|
114 if __name__ == "__main__" : |
|
115 |
|
116 (options, args, parser) = get_options() |
|
117 |
|
118 set_logging(options) |
|
119 get_logger().debug("OPTIONS : " + repr(options)) #@UndefinedVariable |
|
120 |
|
121 if len(sys.argv) == 1: |
|
122 abort(None) |
|
123 |
|
124 base_url = options.get("api_url", None) |
|
125 if not base_url: |
|
126 abort("No base url") |
|
127 |
|
128 api_key = options.get("api_key", None) |
|
129 if not api_key: |
|
130 abort("api key missing") |
|
131 |
|
132 pad_id = options.get("pad_id", None) |
|
133 if not pad_id: |
|
134 abort("No pad id") |
|
135 |
|
136 start_date_str = options.get("start_date",None) |
|
137 end_date_str = options.get("end_date", None) |
|
138 duration = options.get("duration", None) |
|
139 |
|
140 start_date = None |
|
141 start_ts = None |
|
142 if start_date_str: |
|
143 start_date = parse_date(start_date_str) |
|
144 start_ts = time.mktime(start_date.timetuple())*1000 |
|
145 |
|
146 end_date = None |
|
147 if end_date_str: |
|
148 end_date = parse_date(end_date_str) |
|
149 elif start_date and duration: |
|
150 end_date = start_date + datetime.timedelta(seconds=duration) |
|
151 |
|
152 if start_date is None or ts is None: |
|
153 abort("No start date found") |
|
154 |
|
155 end_ts = None |
|
156 if end_date is not None: |
|
157 end_ts = time.mktime(end_date.timetuple())*1000 |
|
158 |
|
159 content_file = options.get("content_file", None) |
|
160 |
|
161 if not content_file: |
|
162 abort("No content file") |
|
163 |
|
164 root = None |
|
165 |
|
166 if content_file.find("http") == 0: |
|
167 |
|
168 get_logger().debug("url : " + content_file) #@UndefinedVariable |
|
169 |
|
170 h = httplib2.Http() |
|
171 resp, content = h.request(content_file) |
|
172 |
|
173 get_logger().debug("url response " + repr(resp) + " content " + repr(content)) #@UndefinedVariable |
|
174 |
|
175 project = anyjson.deserialize(content) |
|
176 root = etree.fromstring(project["ldt"]) |
|
177 |
|
178 elif os.path.exists(content_file): |
|
179 |
|
180 doc = etree.parse(content_file) |
|
181 root = doc.getroot() |
|
182 |
|
183 if root is None: |
|
184 abort("No content file, file not found") |
|
185 |
|
186 cutting_name = options.get("name", None) |
|
187 if cutting_name is None: |
|
188 cutting_name = "pad_%s" % pad_id |
|
189 |
|
190 format = options.get('format','html') |
|
191 ensemble_parent = None |
|
192 |
|
193 file_type = None |
|
194 for node in root: |
|
195 if node.tag == "project": |
|
196 file_type = "ldt" |
|
197 break |
|
198 elif node.tag == "head": |
|
199 file_type = "iri" |
|
200 break |
|
201 if file_type is None: |
|
202 abort("Unknown file type") |
|
203 |
|
204 if file_type == "ldt": |
|
205 media_nodes = root.xpath("//media") |
|
206 if len(media_nodes) > 0: |
|
207 media = media_nodes[0] |
|
208 annotations_node = root.find(u"annotations") |
|
209 if annotations_node is None: |
|
210 annotations_node = etree.SubElement(root, u"annotations") |
|
211 content_node = annotations_node.find(u"content") |
|
212 if content_node is None: |
|
213 content_node = etree.SubElement(annotations_node,u"content", id=media.get(u"id")) |
|
214 ensemble_parent = content_node |
|
215 elif file_type == "iri": |
|
216 body_node = root.find(u"body") |
|
217 if body_node is None: |
|
218 body_node = etree.SubElement(root, u"body") |
|
219 ensembles_node = body_node.find(u"ensembles") |
|
220 if ensembles_node is None: |
|
221 ensembles_node = etree.SubElement(body_node, u"ensembles") |
|
222 ensemble_parent = ensembles_node |
|
223 |
|
224 if ensemble_parent is None: |
|
225 abort("Can not add cutting") |
|
226 |
|
227 if options.replace: |
|
228 for ens in ensemble_parent.iterchildren(tag=u"ensemble"): |
|
229 if ens.get("id","").startswith(cutting_name): |
|
230 ensemble_parent.remove(ens) |
|
231 |
|
232 ensemble = None |
|
233 elements = None |
|
234 |
|
235 if options.merge: |
|
236 ensemble = ensemble_parent.find(u"ensemble") |
|
237 if ensemble is not None: |
|
238 elements = ensemble.find(u".//elements") |
|
239 |
|
240 if ensemble is None or elements is None: |
|
241 ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble pad", u"author":u"IRI Web", u"abstract":u"Ensemble Pad"}) |
|
242 decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"}) |
|
243 |
|
244 etree.SubElement(decoupage, u"title").text = unicode(cutting_name) |
|
245 etree.SubElement(decoupage, u"abstract").text = unicode(cutting_name) |
|
246 |
|
247 elements = etree.SubElement(decoupage, u"elements") |
|
248 |
|
249 |
|
250 etp_req = EtherpadRequest(base_url, api_key) |
|
251 rev_count = et_req.getRevisionCount(pad_id) |
|
252 |
|
253 |
|
254 version_range = range(1,rev_count+1, step) |
|
255 #make sure that teh last version is exported |
|
256 if rev_count not in version_range: |
|
257 version_range.append(rev_count) |
|
258 for rev in version_range: |
|
259 |
|
260 data = None |
|
261 text = "" |
|
262 |
|
263 if format == "html": |
|
264 data = etp_req.getHtml(padID=padID, rev=rev) |
|
265 text = data.get("html", "") |
|
266 else: |
|
267 data = etp_req.getText(padID=padID, rev=rev) |
|
268 text = data.get("text","") |
|
269 |
|
270 pad_ts = data['timestamp'] |
|
271 |
|
272 if pad_ts < start_ts: |
|
273 continue |
|
274 |
|
275 if end_ts is not None and pad_ts > end_ts: |
|
276 break |
|
277 |
|
278 pad_dt = datetime.fromtimestamp(float(pad_ts)/1000.0) |
|
279 pad_ts_rel = pad_ts - start_ts |
|
280 |
|
281 username = None |
|
282 color = "" |
|
283 if 'author' in data: |
|
284 username = data['author']['name'] if ('name' in data['author'] and data['author']['name']) else data['author']['id'] |
|
285 color = data['author']['color'] if ('color' in data['author'] and data['author']['color']) else "" |
|
286 |
|
287 if not username: |
|
288 username = "anon." |
|
289 |
|
290 |
|
291 element = etree.SubElement(elements, u"element" , {u"id":"%s-%s-%d" %(unicode(uuid.uuid4()),unicode(pad_id),rev), u"color":unicode(color), u"author":unicode(username), u"date":unicode(pad_dt.strftime("%Y/%m/%d")), u"begin": unicode(pad_ts_rel), u"dur":u"0", u"src":""}) |
|
292 etree.SubElement(element, u"title").text = "%s: %s - rev %d" % (unicode(username), unicode(pad_id), rev) |
|
293 etree.SubElement(element, u"abstract").text = unicode(text) |
|
294 |
|
295 meta_element = etree.SubElement(element, u'meta') |
|
296 etree.SubElement(meta_element, "pad_url").text = etree.CDATA(unicode(etp_req.getPadUrl(padID))) |
|
297 etree.SubElement(meta_element, "revision").text = etree.CDATA(unicode(rev)) |
|
298 |
|
299 # sort by tc in |
|
300 if options.merge : |
|
301 elements[:] = sorted(elements,key=lambda n: int(n.get('begin'))) |
|
302 |
|
303 output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=False, xml_declaration=True) |
|
304 |
|
305 if content_file and content_file.find("http") == 0: |
|
306 |
|
307 project["ldt"] = output_data |
|
308 body = anyjson.serialize(project) |
|
309 h = httplib2.Http() |
|
310 resp, content = h.request(content_file, "PUT", headers={'content-type':'application/json'}, body=body) |
|
311 if resp.status != 200: |
|
312 raise Exception("Error writing content : %d : %s"%(resp.status, resp.reason)) |
|
313 else: |
|
314 if content_file and os.path.exists(content_file): |
|
315 dest_file_name = content_file |
|
316 else: |
|
317 dest_file_name = options.filename |
|
318 |
|
319 output = open(dest_file_name, "w") |
|
320 output.write(output_data) |
|
321 output.flush() |
|
322 output.close() |
|
323 |
|
324 |