src/cm/converters/pandoc_converters.py
branchpreserve_html
changeset 259 0371caf8bcc6
parent 252 0f0a79f7f213
child 261 b60ab54b6782
--- a/src/cm/converters/pandoc_converters.py	Tue Apr 20 10:47:42 2010 +0200
+++ b/src/cm/converters/pandoc_converters.py	Tue Apr 20 11:14:21 2010 +0200
@@ -13,7 +13,8 @@
 from cm.utils.string_utils import to_unicode
 
 PANDOC_BIN = "pandoc"
-PANDOC_OPTIONS = " -R "
+PANDOC_OPTIONS = " --sanitize-html "
+PANDOC_OPTIONS_RAW = " -R "
 
 MARKDOWN2PDF_BIN = "markdown2pdf"
 
@@ -37,7 +38,7 @@
 _PANDOC_ENCODING = 'utf8'
 
 @memoize
-def pandoc_convert(content, from_format, to_format, full=False):
+def pandoc_convert(content, from_format, to_format, full=False, raw=False):
     """
     Convert markdown content to pdf
     
@@ -56,7 +57,7 @@
         if from_format != 'markdown':
             content = pandoc_convert(content, from_format, 'markdown', True)
         return pandoc_markdown2pdf(content)
-    return pandoc_pandoc(content, from_format, to_format, full)
+    return pandoc_pandoc(content, from_format, to_format, full, from_format==to_format=='html') # use raw pandoc convertion if html->html
 
 def content_or_file_name(content, file_name):
     if not content and not file_name:
@@ -154,7 +155,7 @@
 # TODO: use tidy to cleanup html
 
 @memoize
-def pandoc_pandoc(content, from_format, to_format, full=False):
+def pandoc_pandoc(content, from_format, to_format, full=False, raw=False):
     """
     Convert content (should be unicode) from from_format to to_format
     (if full: includes header & co [html, latex])
@@ -189,7 +190,11 @@
     input_file.close()
     
     # pandoc arguments and command line
-    cmd_args = ' %s -o %s ' %(PANDOC_OPTIONS,output_temp_name) 
+    p_options = PANDOC_OPTIONS
+    if raw:
+        p_options = PANDOC_OPTIONS_RAW
+                
+    cmd_args = ' %s -o %s ' %(p_options,output_temp_name) 
     if full:
         cmd_args += ' -s '
     cmd_args += ' -f %s ' % from_format