script/utils/export_pad.py
changeset 891 8628c590f608
parent 693 2ef837069108
--- a/script/utils/export_pad.py	Wed May 08 01:24:19 2013 +0200
+++ b/script/utils/export_pad.py	Fri May 10 13:27:42 2013 +0200
@@ -1,23 +1,19 @@
 #!/usr/bin/env python
 # coding=utf-8
 
+from dateutil.parser import parse as parse_date
+from iri_tweet.utils import set_logging_options, set_logging, get_logger
 from lxml import etree
-from iri_tweet.models import setup_database
-from optparse import OptionParser #@UnresolvedImport
-from sqlalchemy import Table, Column, BigInteger
-from iri_tweet.utils import (set_logging_options, set_logging, get_filter_query, 
-    get_logger)
+from optparse import OptionParser
 import anyjson
 import datetime
+import functools
 import httplib2
 import os.path
-import re
+import requests
 import sys
 import time
-import uuid #@UnresolvedImport
-from dateutil.parser import parse as parse_date
-import json
-import functools
+import uuid
 
 
 class EtherpadRequestException(Exception):
@@ -149,7 +145,7 @@
     elif start_date and duration:
         end_date = start_date + datetime.timedelta(seconds=duration)
         
-    if start_date is None or ts is None:
+    if start_date is None or end_date is None:
         abort("No start date found")
 
     end_ts = None
@@ -187,7 +183,7 @@
     if cutting_name is None:
         cutting_name = "pad_%s" % pad_id
 
-    format = options.get('format','html')
+    output_format = options.get('format','html')
     ensemble_parent = None
                 
     file_type = None
@@ -248,10 +244,10 @@
 
 
     etp_req = EtherpadRequest(base_url, api_key)
-    rev_count = et_req.getRevisionCount(pad_id)
+    rev_count = etp_req.getRevisionCount(pad_id)
     
     
-    version_range = range(1,rev_count+1, step)
+    version_range = range(1,rev_count+1, 1)
     #make sure that teh last version is exported
     if rev_count not in version_range:
         version_range.append(rev_count)
@@ -259,12 +255,12 @@
         
         data = None
         text = ""
-
-        if format == "html":
-            data = etp_req.getHtml(padID=padID, rev=rev)
+        
+        if output_format == "html":
+            data = etp_req.getHtml(padID=pad_id, rev=rev)
             text = data.get("html", "")
         else:
-            data = etp_req.getText(padID=padID, rev=rev)
+            data = etp_req.getText(padID=pad_id, rev=rev)
             text = data.get("text","")
 
         pad_ts = data['timestamp']
@@ -273,9 +269,9 @@
             continue
         
         if end_ts is not None and pad_ts > end_ts:
-             break
+            break
 
-        pad_dt = datetime.fromtimestamp(float(pad_ts)/1000.0)
+        pad_dt = datetime.datetime.fromtimestamp(float(pad_ts)/1000.0)
         pad_ts_rel = pad_ts - start_ts
         
         username = None
@@ -293,7 +289,7 @@
         etree.SubElement(element, u"abstract").text = unicode(text)
         
         meta_element = etree.SubElement(element, u'meta')
-        etree.SubElement(meta_element, "pad_url").text = etree.CDATA(unicode(etp_req.getPadUrl(padID)))
+        etree.SubElement(meta_element, "pad_url").text = etree.CDATA(unicode(etp_req.getPadUrl(pad_id)))
         etree.SubElement(meta_element, "revision").text = etree.CDATA(unicode(rev))
 
     # sort by tc in