--- a/script/utils/export_pad.py Wed May 08 01:24:19 2013 +0200
+++ b/script/utils/export_pad.py Fri May 10 13:27:42 2013 +0200
@@ -1,23 +1,19 @@
#!/usr/bin/env python
# coding=utf-8
+from dateutil.parser import parse as parse_date
+from iri_tweet.utils import set_logging_options, set_logging, get_logger
from lxml import etree
-from iri_tweet.models import setup_database
-from optparse import OptionParser #@UnresolvedImport
-from sqlalchemy import Table, Column, BigInteger
-from iri_tweet.utils import (set_logging_options, set_logging, get_filter_query,
- get_logger)
+from optparse import OptionParser
import anyjson
import datetime
+import functools
import httplib2
import os.path
-import re
+import requests
import sys
import time
-import uuid #@UnresolvedImport
-from dateutil.parser import parse as parse_date
-import json
-import functools
+import uuid
class EtherpadRequestException(Exception):
@@ -149,7 +145,7 @@
elif start_date and duration:
end_date = start_date + datetime.timedelta(seconds=duration)
- if start_date is None or ts is None:
+ if start_date is None or end_date is None:
abort("No start date found")
end_ts = None
@@ -187,7 +183,7 @@
if cutting_name is None:
cutting_name = "pad_%s" % pad_id
- format = options.get('format','html')
+ output_format = options.get('format','html')
ensemble_parent = None
file_type = None
@@ -248,10 +244,10 @@
etp_req = EtherpadRequest(base_url, api_key)
- rev_count = et_req.getRevisionCount(pad_id)
+ rev_count = etp_req.getRevisionCount(pad_id)
- version_range = range(1,rev_count+1, step)
+ version_range = range(1,rev_count+1, 1)
#make sure that teh last version is exported
if rev_count not in version_range:
version_range.append(rev_count)
@@ -259,12 +255,12 @@
data = None
text = ""
-
- if format == "html":
- data = etp_req.getHtml(padID=padID, rev=rev)
+
+ if output_format == "html":
+ data = etp_req.getHtml(padID=pad_id, rev=rev)
text = data.get("html", "")
else:
- data = etp_req.getText(padID=padID, rev=rev)
+ data = etp_req.getText(padID=pad_id, rev=rev)
text = data.get("text","")
pad_ts = data['timestamp']
@@ -273,9 +269,9 @@
continue
if end_ts is not None and pad_ts > end_ts:
- break
+ break
- pad_dt = datetime.fromtimestamp(float(pad_ts)/1000.0)
+ pad_dt = datetime.datetime.fromtimestamp(float(pad_ts)/1000.0)
pad_ts_rel = pad_ts - start_ts
username = None
@@ -293,7 +289,7 @@
etree.SubElement(element, u"abstract").text = unicode(text)
meta_element = etree.SubElement(element, u'meta')
- etree.SubElement(meta_element, "pad_url").text = etree.CDATA(unicode(etp_req.getPadUrl(padID)))
+ etree.SubElement(meta_element, "pad_url").text = etree.CDATA(unicode(etp_req.getPadUrl(pad_id)))
etree.SubElement(meta_element, "revision").text = etree.CDATA(unicode(rev))
# sort by tc in