diff -r f62f7f0bcaa4 -r ba7e05582435 src/cm/converters/pandoc_converters.py --- a/src/cm/converters/pandoc_converters.py Thu Aug 09 11:06:46 2012 +0200 +++ b/src/cm/converters/pandoc_converters.py Thu Aug 09 13:05:11 2012 +0200 @@ -11,7 +11,7 @@ import StringIO import tidy from cm.utils.string_utils import to_unicode -from xml.dom.minidom import parseString +from BeautifulSoup import BeautifulSoup import re from distutils.version import LooseVersion @@ -221,22 +221,15 @@ if from_format==to_format=='html': # get body content stdoutdata = (content.encode('utf8')) - #stdoutdata = re.sub(r".*]*>", '', stdoutdata) - #stdoutdata = re.sub(r".*", '', stdoutdata) - # if for some reason, tidy has not guess the doctype, make xml.dom.minidom happy with HTML entities ( ) - stdoutdata = re.sub(r" ", '\xc2\xa0', stdoutdata) - dom = parseString(stdoutdata) - body = dom.getElementsByTagName("body")[0].toxml() - stdoutdata = body[body.find('>')+1:body.rfind('", r"\n\1>", stdoutdata) # do not split closing tag with following opening tag stdoutdata = re.sub(r">\n<", r"><", stdoutdata) - # nest headers tags - #stdoutdata = re.sub(r'', r'
', stdoutdata) - #stdoutdata = re.sub(r'<\/h(\d)\n>', r'
', stdoutdata) return stdoutdata cmd_args = ' %s -o %s ' %(p_options,output_temp_name) @@ -248,8 +241,6 @@ cmd_args += ' %s ' % input_temp_name cmd = PANDOC_BIN + ' ' + cmd_args - #from socommons.converters.new_conv import controlled_Popen - #controlled_Popen(cmd, stderr=file(error_temp_name,'w')) fp_error = file(error_temp_name,'w') retcode = call(cmd, shell=True, stderr=fp_error) fp_error.close()