--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/script/utils/export_pad.py Mon Oct 15 17:01:50 2012 +0200
@@ -0,0 +1,324 @@
+#!/usr/bin/env python
+# coding=utf-8
+
+from lxml import etree
+from iri_tweet.models import setup_database
+from optparse import OptionParser #@UnresolvedImport
+from sqlalchemy import Table, Column, BigInteger
+from iri_tweet.utils import (set_logging_options, set_logging, get_filter_query,
+ get_logger)
+import anyjson
+import datetime
+import httplib2
+import os.path
+import re
+import sys
+import time
+import uuid #@UnresolvedImport
+from dateutil.parser import parse as parse_date
+import json
+import functools
+
+
+class EtherpadRequestException(Exception):
+ def __init__(self, original_resp):
+ super(EtherpadRequestException, self).__init__(original_resp["message"])
+ self.status = original_resp["status"]
+ self.original_resp = original_resp
+
+
+class EtherpadRequest():
+
+ def __init__(self, base_url, api_key):
+ self.base_url = base_url
+ self.api_key = api_key
+ self.__request = None
+
+ def __getattr__(self, name):
+ return functools.partial(self.__action, name)
+
+ def __action(self, action, **kwargs):
+ url = "%s/%s" % (self.base_url, action)
+ params = dict(kwargs)
+ params['apikey'] = self.api_key
+
+ r = requests.get(url, params)
+
+ resp = anyjson.deserialize(r.text)
+
+ if resp["code"] == 0:
+ return resp["data"]
+ else:
+ raise EtherpadRequestException(resp)
+
+ return resp
+
+ def getRevisionsCount(self, padID):
+ f = self.__getattr__("getRevisionsCount")
+ res = f(padID=padID)
+
+ return res["revisions"]
+
+ def getPadUrl(self, padID):
+
+ return "%s/%s" % (self.base_url,padID)
+
+
+
+def abort(message, parser):
+ if message is not None:
+ sys.stderr.write(message + "\n")
+ parser.print_help()
+ sys.exit(1)
+
+def get_options():
+
+ parser = OptionParser()
+ parser.add_option("-u", "--api-url", dest="api_url",
+ help="Base etherpad-lite api url", metavar="API_URL", default=None)
+ parser.add_option("-k", "--api-key", dest="api_key",
+ help="Base etherpad-lite api url", metavar="API_KEY", default=None)
+ parser.add_option("-p", "--pad-id", dest="pad_id",
+ help="pad id", metavar="PADID")
+ parser.add_option("-s", "--start-date", dest="start_date",
+ help="start date", metavar="START_DATE", default=None)
+ parser.add_option("-e", "--end-date", dest="end_date",
+ help="end date", metavar="END_DATE", default=None)
+ parser.add_option("-f", "--format", dest="format", type="choice",
+ help="format", metavar="FORMAT", choice=['html', 'text'], default='html')
+ parser.add_option("-I", "--content-file", dest="content_file",
+ help="Content file", metavar="CONTENT_FILE")
+ parser.add_option("-C", "--color", dest="color",
+ help="Color code", metavar="COLOR", default="16763904")
+ parser.add_option("-D", "--duration", dest="duration", type="int",
+ help="Duration", metavar="DURATION", default=None)
+ parser.add_option("-n", "--name", dest="name",
+ help="Cutting name", metavar="NAME", default=u"pads")
+ parser.add_option("-R", "--replace", dest="replace", action="store_true",
+ help="Replace tweet ensemble", metavar="REPLACE", default=False)
+ parser.add_option("-m", "--merge", dest="merge", action="store_true",
+ help="merge tweet ensemble, choose the first ensemble", metavar="MERGE", default=False)
+ parser.add_option("-E", "--extended", dest="extended_mode", action="store_true",
+ help="Trigger polemic extended mode", metavar="EXTENDED", default=False)
+ parser.add_option("-S", "--step", dest="step", type=1,
+ help="step for version", metavar="STEP", default=False)
+
+
+
+ set_logging_options(parser)
+
+
+ return parser.parse_args() + (parser,)
+
+
+if __name__ == "__main__" :
+
+ (options, args, parser) = get_options()
+
+ set_logging(options)
+ get_logger().debug("OPTIONS : " + repr(options)) #@UndefinedVariable
+
+ if len(sys.argv) == 1:
+ abort(None)
+
+ base_url = options.get("api_url", None)
+ if not base_url:
+ abort("No base url")
+
+ api_key = options.get("api_key", None)
+ if not api_key:
+ abort("api key missing")
+
+ pad_id = options.get("pad_id", None)
+ if not pad_id:
+ abort("No pad id")
+
+ start_date_str = options.get("start_date",None)
+ end_date_str = options.get("end_date", None)
+ duration = options.get("duration", None)
+
+ start_date = None
+ start_ts = None
+ if start_date_str:
+ start_date = parse_date(start_date_str)
+ start_ts = time.mktime(start_date.timetuple())*1000
+
+ end_date = None
+ if end_date_str:
+ end_date = parse_date(end_date_str)
+ elif start_date and duration:
+ end_date = start_date + datetime.timedelta(seconds=duration)
+
+ if start_date is None or ts is None:
+ abort("No start date found")
+
+ end_ts = None
+ if end_date is not None:
+ end_ts = time.mktime(end_date.timetuple())*1000
+
+ content_file = options.get("content_file", None)
+
+ if not content_file:
+ abort("No content file")
+
+ root = None
+
+ if content_file.find("http") == 0:
+
+ get_logger().debug("url : " + content_file) #@UndefinedVariable
+
+ h = httplib2.Http()
+ resp, content = h.request(content_file)
+
+ get_logger().debug("url response " + repr(resp) + " content " + repr(content)) #@UndefinedVariable
+
+ project = anyjson.deserialize(content)
+ root = etree.fromstring(project["ldt"])
+
+ elif os.path.exists(content_file):
+
+ doc = etree.parse(content_file)
+ root = doc.getroot()
+
+ if root is None:
+ abort("No content file, file not found")
+
+ cutting_name = options.get("name", None)
+ if cutting_name is None:
+ cutting_name = "pad_%s" % pad_id
+
+ format = options.get('format','html')
+ ensemble_parent = None
+
+ file_type = None
+ for node in root:
+ if node.tag == "project":
+ file_type = "ldt"
+ break
+ elif node.tag == "head":
+ file_type = "iri"
+ break
+ if file_type is None:
+ abort("Unknown file type")
+
+ if file_type == "ldt":
+ media_nodes = root.xpath("//media")
+ if len(media_nodes) > 0:
+ media = media_nodes[0]
+ annotations_node = root.find(u"annotations")
+ if annotations_node is None:
+ annotations_node = etree.SubElement(root, u"annotations")
+ content_node = annotations_node.find(u"content")
+ if content_node is None:
+ content_node = etree.SubElement(annotations_node,u"content", id=media.get(u"id"))
+ ensemble_parent = content_node
+ elif file_type == "iri":
+ body_node = root.find(u"body")
+ if body_node is None:
+ body_node = etree.SubElement(root, u"body")
+ ensembles_node = body_node.find(u"ensembles")
+ if ensembles_node is None:
+ ensembles_node = etree.SubElement(body_node, u"ensembles")
+ ensemble_parent = ensembles_node
+
+ if ensemble_parent is None:
+ abort("Can not add cutting")
+
+ if options.replace:
+ for ens in ensemble_parent.iterchildren(tag=u"ensemble"):
+ if ens.get("id","").startswith(cutting_name):
+ ensemble_parent.remove(ens)
+
+ ensemble = None
+ elements = None
+
+ if options.merge:
+ ensemble = ensemble_parent.find(u"ensemble")
+ if ensemble is not None:
+ elements = ensemble.find(u".//elements")
+
+ if ensemble is None or elements is None:
+ ensemble = etree.SubElement(ensemble_parent, u"ensemble", {u"id":u"tweet_" + unicode(uuid.uuid4()), u"title":u"Ensemble pad", u"author":u"IRI Web", u"abstract":u"Ensemble Pad"})
+ decoupage = etree.SubElement(ensemble, u"decoupage", {u"id": unicode(uuid.uuid4()), u"author": u"IRI Web"})
+
+ etree.SubElement(decoupage, u"title").text = unicode(cutting_name)
+ etree.SubElement(decoupage, u"abstract").text = unicode(cutting_name)
+
+ elements = etree.SubElement(decoupage, u"elements")
+
+
+ etp_req = EtherpadRequest(base_url, api_key)
+ rev_count = et_req.getRevisionCount(pad_id)
+
+
+ version_range = range(1,rev_count+1, step)
+ #make sure that teh last version is exported
+ if rev_count not in version_range:
+ version_range.append(rev_count)
+ for rev in version_range:
+
+ data = None
+ text = ""
+
+ if format == "html":
+ data = etp_req.getHtml(padID=padID, rev=rev)
+ text = data.get("html", "")
+ else:
+ data = etp_req.getText(padID=padID, rev=rev)
+ text = data.get("text","")
+
+ pad_ts = data['timestamp']
+
+ if pad_ts < start_ts:
+ continue
+
+ if end_ts is not None and pad_ts > end_ts:
+ break
+
+ pad_dt = datetime.fromtimestamp(float(pad_ts)/1000.0)
+ pad_ts_rel = pad_ts - start_ts
+
+ username = None
+ color = ""
+ if 'author' in data:
+ username = data['author']['name'] if ('name' in data['author'] and data['author']['name']) else data['author']['id']
+ color = data['author']['color'] if ('color' in data['author'] and data['author']['color']) else ""
+
+ if not username:
+ username = "anon."
+
+
+ element = etree.SubElement(elements, u"element" , {u"id":"%s-%s-%d" %(unicode(uuid.uuid4()),unicode(pad_id),rev), u"color":unicode(color), u"author":unicode(username), u"date":unicode(pad_dt.strftime("%Y/%m/%d")), u"begin": unicode(pad_ts_rel), u"dur":u"0", u"src":""})
+ etree.SubElement(element, u"title").text = "%s: %s - rev %d" % (unicode(username), unicode(pad_id), rev)
+ etree.SubElement(element, u"abstract").text = unicode(text)
+
+ meta_element = etree.SubElement(element, u'meta')
+ etree.SubElement(meta_element, "pad_url").text = etree.CDATA(unicode(etp_req.getPadUrl(padID)))
+ etree.SubElement(meta_element, "revision").text = etree.CDATA(unicode(rev))
+
+ # sort by tc in
+ if options.merge :
+ elements[:] = sorted(elements,key=lambda n: int(n.get('begin')))
+
+ output_data = etree.tostring(root, encoding="utf-8", method="xml", pretty_print=False, xml_declaration=True)
+
+ if content_file and content_file.find("http") == 0:
+
+ project["ldt"] = output_data
+ body = anyjson.serialize(project)
+ h = httplib2.Http()
+ resp, content = h.request(content_file, "PUT", headers={'content-type':'application/json'}, body=body)
+ if resp.status != 200:
+ raise Exception("Error writing content : %d : %s"%(resp.status, resp.reason))
+ else:
+ if content_file and os.path.exists(content_file):
+ dest_file_name = content_file
+ else:
+ dest_file_name = options.filename
+
+ output = open(dest_file_name, "w")
+ output.write(output_data)
+ output.flush()
+ output.close()
+
+