script/utils/export_pad.py
changeset 891 8628c590f608
parent 693 2ef837069108
equal deleted inserted replaced
890:9c57883dbb9d 891:8628c590f608
     1 #!/usr/bin/env python
     1 #!/usr/bin/env python
     2 # coding=utf-8
     2 # coding=utf-8
     3 
     3 
       
     4 from dateutil.parser import parse as parse_date
       
     5 from iri_tweet.utils import set_logging_options, set_logging, get_logger
     4 from lxml import etree
     6 from lxml import etree
     5 from iri_tweet.models import setup_database
     7 from optparse import OptionParser
     6 from optparse import OptionParser #@UnresolvedImport
       
     7 from sqlalchemy import Table, Column, BigInteger
       
     8 from iri_tweet.utils import (set_logging_options, set_logging, get_filter_query, 
       
     9     get_logger)
       
    10 import anyjson
     8 import anyjson
    11 import datetime
     9 import datetime
       
    10 import functools
    12 import httplib2
    11 import httplib2
    13 import os.path
    12 import os.path
    14 import re
    13 import requests
    15 import sys
    14 import sys
    16 import time
    15 import time
    17 import uuid #@UnresolvedImport
    16 import uuid
    18 from dateutil.parser import parse as parse_date
       
    19 import json
       
    20 import functools
       
    21 
    17 
    22 
    18 
    23 class EtherpadRequestException(Exception):
    19 class EtherpadRequestException(Exception):
    24     def __init__(self, original_resp):
    20     def __init__(self, original_resp):
    25         super(EtherpadRequestException, self).__init__(original_resp["message"])
    21         super(EtherpadRequestException, self).__init__(original_resp["message"])
   147     if end_date_str:
   143     if end_date_str:
   148         end_date = parse_date(end_date_str)
   144         end_date = parse_date(end_date_str)
   149     elif start_date and duration:
   145     elif start_date and duration:
   150         end_date = start_date + datetime.timedelta(seconds=duration)
   146         end_date = start_date + datetime.timedelta(seconds=duration)
   151         
   147         
   152     if start_date is None or ts is None:
   148     if start_date is None or end_date is None:
   153         abort("No start date found")
   149         abort("No start date found")
   154 
   150 
   155     end_ts = None
   151     end_ts = None
   156     if end_date is not None:
   152     if end_date is not None:
   157         end_ts = time.mktime(end_date.timetuple())*1000
   153         end_ts = time.mktime(end_date.timetuple())*1000
   185 
   181 
   186     cutting_name = options.get("name", None)
   182     cutting_name = options.get("name", None)
   187     if cutting_name is None:
   183     if cutting_name is None:
   188         cutting_name = "pad_%s" % pad_id
   184         cutting_name = "pad_%s" % pad_id
   189 
   185 
   190     format = options.get('format','html')
   186     output_format = options.get('format','html')
   191     ensemble_parent = None
   187     ensemble_parent = None
   192                 
   188                 
   193     file_type = None
   189     file_type = None
   194     for node in root:
   190     for node in root:
   195         if node.tag == "project":
   191         if node.tag == "project":
   246     
   242     
   247         elements = etree.SubElement(decoupage, u"elements")
   243         elements = etree.SubElement(decoupage, u"elements")
   248 
   244 
   249 
   245 
   250     etp_req = EtherpadRequest(base_url, api_key)
   246     etp_req = EtherpadRequest(base_url, api_key)
   251     rev_count = et_req.getRevisionCount(pad_id)
   247     rev_count = etp_req.getRevisionCount(pad_id)
   252     
   248     
   253     
   249     
   254     version_range = range(1,rev_count+1, step)
   250     version_range = range(1,rev_count+1, 1)
   255     #make sure that teh last version is exported
   251     #make sure that teh last version is exported
   256     if rev_count not in version_range:
   252     if rev_count not in version_range:
   257         version_range.append(rev_count)
   253         version_range.append(rev_count)
   258     for rev in version_range:
   254     for rev in version_range:
   259         
   255         
   260         data = None
   256         data = None
   261         text = ""
   257         text = ""
   262 
   258         
   263         if format == "html":
   259         if output_format == "html":
   264             data = etp_req.getHtml(padID=padID, rev=rev)
   260             data = etp_req.getHtml(padID=pad_id, rev=rev)
   265             text = data.get("html", "")
   261             text = data.get("html", "")
   266         else:
   262         else:
   267             data = etp_req.getText(padID=padID, rev=rev)
   263             data = etp_req.getText(padID=pad_id, rev=rev)
   268             text = data.get("text","")
   264             text = data.get("text","")
   269 
   265 
   270         pad_ts = data['timestamp']
   266         pad_ts = data['timestamp']
   271         
   267         
   272         if pad_ts < start_ts:
   268         if pad_ts < start_ts:
   273             continue
   269             continue
   274         
   270         
   275         if end_ts is not None and pad_ts > end_ts:
   271         if end_ts is not None and pad_ts > end_ts:
   276              break
   272             break
   277 
   273 
   278         pad_dt = datetime.fromtimestamp(float(pad_ts)/1000.0)
   274         pad_dt = datetime.datetime.fromtimestamp(float(pad_ts)/1000.0)
   279         pad_ts_rel = pad_ts - start_ts
   275         pad_ts_rel = pad_ts - start_ts
   280         
   276         
   281         username = None
   277         username = None
   282         color = ""
   278         color = ""
   283         if 'author' in data:
   279         if 'author' in data:
   291         element = etree.SubElement(elements, u"element" , {u"id":"%s-%s-%d" %(unicode(uuid.uuid4()),unicode(pad_id),rev), u"color":unicode(color), u"author":unicode(username), u"date":unicode(pad_dt.strftime("%Y/%m/%d")), u"begin": unicode(pad_ts_rel), u"dur":u"0", u"src":""})
   287         element = etree.SubElement(elements, u"element" , {u"id":"%s-%s-%d" %(unicode(uuid.uuid4()),unicode(pad_id),rev), u"color":unicode(color), u"author":unicode(username), u"date":unicode(pad_dt.strftime("%Y/%m/%d")), u"begin": unicode(pad_ts_rel), u"dur":u"0", u"src":""})
   292         etree.SubElement(element, u"title").text = "%s: %s - rev %d" % (unicode(username), unicode(pad_id), rev)
   288         etree.SubElement(element, u"title").text = "%s: %s - rev %d" % (unicode(username), unicode(pad_id), rev)
   293         etree.SubElement(element, u"abstract").text = unicode(text)
   289         etree.SubElement(element, u"abstract").text = unicode(text)
   294         
   290         
   295         meta_element = etree.SubElement(element, u'meta')
   291         meta_element = etree.SubElement(element, u'meta')
   296         etree.SubElement(meta_element, "pad_url").text = etree.CDATA(unicode(etp_req.getPadUrl(padID)))
   292         etree.SubElement(meta_element, "pad_url").text = etree.CDATA(unicode(etp_req.getPadUrl(pad_id)))
   297         etree.SubElement(meta_element, "revision").text = etree.CDATA(unicode(rev))
   293         etree.SubElement(meta_element, "revision").text = etree.CDATA(unicode(rev))
   298 
   294 
   299     # sort by tc in
   295     # sort by tc in
   300     if options.merge :
   296     if options.merge :
   301         elements[:] = sorted(elements,key=lambda n: int(n.get('begin')))
   297         elements[:] = sorted(elements,key=lambda n: int(n.get('begin')))