1 #!/usr/bin/env python |
1 #!/usr/bin/env python |
2 # coding=utf-8 |
2 # coding=utf-8 |
3 |
3 |
|
4 from dateutil.parser import parse as parse_date |
|
5 from iri_tweet.utils import set_logging_options, set_logging, get_logger |
4 from lxml import etree |
6 from lxml import etree |
5 from iri_tweet.models import setup_database |
7 from optparse import OptionParser |
6 from optparse import OptionParser #@UnresolvedImport |
|
7 from sqlalchemy import Table, Column, BigInteger |
|
8 from iri_tweet.utils import (set_logging_options, set_logging, get_filter_query, |
|
9 get_logger) |
|
10 import anyjson |
8 import anyjson |
11 import datetime |
9 import datetime |
|
10 import functools |
12 import httplib2 |
11 import httplib2 |
13 import os.path |
12 import os.path |
14 import re |
13 import requests |
15 import sys |
14 import sys |
16 import time |
15 import time |
17 import uuid #@UnresolvedImport |
16 import uuid |
18 from dateutil.parser import parse as parse_date |
|
19 import json |
|
20 import functools |
|
21 |
17 |
22 |
18 |
23 class EtherpadRequestException(Exception): |
19 class EtherpadRequestException(Exception): |
24 def __init__(self, original_resp): |
20 def __init__(self, original_resp): |
25 super(EtherpadRequestException, self).__init__(original_resp["message"]) |
21 super(EtherpadRequestException, self).__init__(original_resp["message"]) |
147 if end_date_str: |
143 if end_date_str: |
148 end_date = parse_date(end_date_str) |
144 end_date = parse_date(end_date_str) |
149 elif start_date and duration: |
145 elif start_date and duration: |
150 end_date = start_date + datetime.timedelta(seconds=duration) |
146 end_date = start_date + datetime.timedelta(seconds=duration) |
151 |
147 |
152 if start_date is None or ts is None: |
148 if start_date is None or end_date is None: |
153 abort("No start date found") |
149 abort("No start date found") |
154 |
150 |
155 end_ts = None |
151 end_ts = None |
156 if end_date is not None: |
152 if end_date is not None: |
157 end_ts = time.mktime(end_date.timetuple())*1000 |
153 end_ts = time.mktime(end_date.timetuple())*1000 |
185 |
181 |
186 cutting_name = options.get("name", None) |
182 cutting_name = options.get("name", None) |
187 if cutting_name is None: |
183 if cutting_name is None: |
188 cutting_name = "pad_%s" % pad_id |
184 cutting_name = "pad_%s" % pad_id |
189 |
185 |
190 format = options.get('format','html') |
186 output_format = options.get('format','html') |
191 ensemble_parent = None |
187 ensemble_parent = None |
192 |
188 |
193 file_type = None |
189 file_type = None |
194 for node in root: |
190 for node in root: |
195 if node.tag == "project": |
191 if node.tag == "project": |
246 |
242 |
247 elements = etree.SubElement(decoupage, u"elements") |
243 elements = etree.SubElement(decoupage, u"elements") |
248 |
244 |
249 |
245 |
250 etp_req = EtherpadRequest(base_url, api_key) |
246 etp_req = EtherpadRequest(base_url, api_key) |
251 rev_count = et_req.getRevisionCount(pad_id) |
247 rev_count = etp_req.getRevisionCount(pad_id) |
252 |
248 |
253 |
249 |
254 version_range = range(1,rev_count+1, step) |
250 version_range = range(1,rev_count+1, 1) |
255 #make sure that teh last version is exported |
251 #make sure that teh last version is exported |
256 if rev_count not in version_range: |
252 if rev_count not in version_range: |
257 version_range.append(rev_count) |
253 version_range.append(rev_count) |
258 for rev in version_range: |
254 for rev in version_range: |
259 |
255 |
260 data = None |
256 data = None |
261 text = "" |
257 text = "" |
262 |
258 |
263 if format == "html": |
259 if output_format == "html": |
264 data = etp_req.getHtml(padID=padID, rev=rev) |
260 data = etp_req.getHtml(padID=pad_id, rev=rev) |
265 text = data.get("html", "") |
261 text = data.get("html", "") |
266 else: |
262 else: |
267 data = etp_req.getText(padID=padID, rev=rev) |
263 data = etp_req.getText(padID=pad_id, rev=rev) |
268 text = data.get("text","") |
264 text = data.get("text","") |
269 |
265 |
270 pad_ts = data['timestamp'] |
266 pad_ts = data['timestamp'] |
271 |
267 |
272 if pad_ts < start_ts: |
268 if pad_ts < start_ts: |
273 continue |
269 continue |
274 |
270 |
275 if end_ts is not None and pad_ts > end_ts: |
271 if end_ts is not None and pad_ts > end_ts: |
276 break |
272 break |
277 |
273 |
278 pad_dt = datetime.fromtimestamp(float(pad_ts)/1000.0) |
274 pad_dt = datetime.datetime.fromtimestamp(float(pad_ts)/1000.0) |
279 pad_ts_rel = pad_ts - start_ts |
275 pad_ts_rel = pad_ts - start_ts |
280 |
276 |
281 username = None |
277 username = None |
282 color = "" |
278 color = "" |
283 if 'author' in data: |
279 if 'author' in data: |
291 element = etree.SubElement(elements, u"element" , {u"id":"%s-%s-%d" %(unicode(uuid.uuid4()),unicode(pad_id),rev), u"color":unicode(color), u"author":unicode(username), u"date":unicode(pad_dt.strftime("%Y/%m/%d")), u"begin": unicode(pad_ts_rel), u"dur":u"0", u"src":""}) |
287 element = etree.SubElement(elements, u"element" , {u"id":"%s-%s-%d" %(unicode(uuid.uuid4()),unicode(pad_id),rev), u"color":unicode(color), u"author":unicode(username), u"date":unicode(pad_dt.strftime("%Y/%m/%d")), u"begin": unicode(pad_ts_rel), u"dur":u"0", u"src":""}) |
292 etree.SubElement(element, u"title").text = "%s: %s - rev %d" % (unicode(username), unicode(pad_id), rev) |
288 etree.SubElement(element, u"title").text = "%s: %s - rev %d" % (unicode(username), unicode(pad_id), rev) |
293 etree.SubElement(element, u"abstract").text = unicode(text) |
289 etree.SubElement(element, u"abstract").text = unicode(text) |
294 |
290 |
295 meta_element = etree.SubElement(element, u'meta') |
291 meta_element = etree.SubElement(element, u'meta') |
296 etree.SubElement(meta_element, "pad_url").text = etree.CDATA(unicode(etp_req.getPadUrl(padID))) |
292 etree.SubElement(meta_element, "pad_url").text = etree.CDATA(unicode(etp_req.getPadUrl(pad_id))) |
297 etree.SubElement(meta_element, "revision").text = etree.CDATA(unicode(rev)) |
293 etree.SubElement(meta_element, "revision").text = etree.CDATA(unicode(rev)) |
298 |
294 |
299 # sort by tc in |
295 # sort by tc in |
300 if options.merge : |
296 if options.merge : |
301 elements[:] = sorted(elements,key=lambda n: int(n.get('begin'))) |
297 elements[:] = sorted(elements,key=lambda n: int(n.get('begin'))) |