diff -r a79a3c91d9b5 -r 0371caf8bcc6 src/cm/converters/pandoc_converters.py --- a/src/cm/converters/pandoc_converters.py Tue Apr 20 10:47:42 2010 +0200 +++ b/src/cm/converters/pandoc_converters.py Tue Apr 20 11:14:21 2010 +0200 @@ -13,7 +13,8 @@ from cm.utils.string_utils import to_unicode PANDOC_BIN = "pandoc" -PANDOC_OPTIONS = " -R " +PANDOC_OPTIONS = " --sanitize-html " +PANDOC_OPTIONS_RAW = " -R " MARKDOWN2PDF_BIN = "markdown2pdf" @@ -37,7 +38,7 @@ _PANDOC_ENCODING = 'utf8' @memoize -def pandoc_convert(content, from_format, to_format, full=False): +def pandoc_convert(content, from_format, to_format, full=False, raw=False): """ Convert markdown content to pdf @@ -56,7 +57,7 @@ if from_format != 'markdown': content = pandoc_convert(content, from_format, 'markdown', True) return pandoc_markdown2pdf(content) - return pandoc_pandoc(content, from_format, to_format, full) + return pandoc_pandoc(content, from_format, to_format, full, from_format==to_format=='html') # use raw pandoc convertion if html->html def content_or_file_name(content, file_name): if not content and not file_name: @@ -154,7 +155,7 @@ # TODO: use tidy to cleanup html @memoize -def pandoc_pandoc(content, from_format, to_format, full=False): +def pandoc_pandoc(content, from_format, to_format, full=False, raw=False): """ Convert content (should be unicode) from from_format to to_format (if full: includes header & co [html, latex]) @@ -189,7 +190,11 @@ input_file.close() # pandoc arguments and command line - cmd_args = ' %s -o %s ' %(PANDOC_OPTIONS,output_temp_name) + p_options = PANDOC_OPTIONS + if raw: + p_options = PANDOC_OPTIONS_RAW + + cmd_args = ' %s -o %s ' %(p_options,output_temp_name) if full: cmd_args += ' -s ' cmd_args += ' -f %s ' % from_format