diff -r 9c57883dbb9d -r 8628c590f608 script/utils/export_pad.py --- a/script/utils/export_pad.py Wed May 08 01:24:19 2013 +0200 +++ b/script/utils/export_pad.py Fri May 10 13:27:42 2013 +0200 @@ -1,23 +1,19 @@ #!/usr/bin/env python # coding=utf-8 +from dateutil.parser import parse as parse_date +from iri_tweet.utils import set_logging_options, set_logging, get_logger from lxml import etree -from iri_tweet.models import setup_database -from optparse import OptionParser #@UnresolvedImport -from sqlalchemy import Table, Column, BigInteger -from iri_tweet.utils import (set_logging_options, set_logging, get_filter_query, - get_logger) +from optparse import OptionParser import anyjson import datetime +import functools import httplib2 import os.path -import re +import requests import sys import time -import uuid #@UnresolvedImport -from dateutil.parser import parse as parse_date -import json -import functools +import uuid class EtherpadRequestException(Exception): @@ -149,7 +145,7 @@ elif start_date and duration: end_date = start_date + datetime.timedelta(seconds=duration) - if start_date is None or ts is None: + if start_date is None or end_date is None: abort("No start date found") end_ts = None @@ -187,7 +183,7 @@ if cutting_name is None: cutting_name = "pad_%s" % pad_id - format = options.get('format','html') + output_format = options.get('format','html') ensemble_parent = None file_type = None @@ -248,10 +244,10 @@ etp_req = EtherpadRequest(base_url, api_key) - rev_count = et_req.getRevisionCount(pad_id) + rev_count = etp_req.getRevisionCount(pad_id) - version_range = range(1,rev_count+1, step) + version_range = range(1,rev_count+1, 1) #make sure that teh last version is exported if rev_count not in version_range: version_range.append(rev_count) @@ -259,12 +255,12 @@ data = None text = "" - - if format == "html": - data = etp_req.getHtml(padID=padID, rev=rev) + + if output_format == "html": + data = etp_req.getHtml(padID=pad_id, rev=rev) text = data.get("html", "") else: - data = etp_req.getText(padID=padID, rev=rev) + data = etp_req.getText(padID=pad_id, rev=rev) text = data.get("text","") pad_ts = data['timestamp'] @@ -273,9 +269,9 @@ continue if end_ts is not None and pad_ts > end_ts: - break + break - pad_dt = datetime.fromtimestamp(float(pad_ts)/1000.0) + pad_dt = datetime.datetime.fromtimestamp(float(pad_ts)/1000.0) pad_ts_rel = pad_ts - start_ts username = None @@ -293,7 +289,7 @@ etree.SubElement(element, u"abstract").text = unicode(text) meta_element = etree.SubElement(element, u'meta') - etree.SubElement(meta_element, "pad_url").text = etree.CDATA(unicode(etp_req.getPadUrl(padID))) + etree.SubElement(meta_element, "pad_url").text = etree.CDATA(unicode(etp_req.getPadUrl(pad_id))) etree.SubElement(meta_element, "revision").text = etree.CDATA(unicode(rev)) # sort by tc in