always use pandoc but in raw mode for html->html convert preserve_html
authorraph
Tue, 20 Apr 2010 11:14:21 +0200
branchpreserve_html
changeset 259 0371caf8bcc6
parent 258 a79a3c91d9b5
child 260 9075dc2fb93c
always use pandoc but in raw mode for html->html convert
src/cm/converters/__init__.py
src/cm/converters/pandoc_converters.py
src/cm/models.py
--- a/src/cm/converters/__init__.py	Tue Apr 20 10:47:42 2010 +0200
+++ b/src/cm/converters/__init__.py	Tue Apr 20 11:14:21 2010 +0200
@@ -26,8 +26,8 @@
         if format == 'html':
                 _not_used_css, converted_input = extract_css_body(xhtml_input)
                 #converted_input = xhtml_input
-        else:
-            converted_input = pandoc_convert(html_input, 'html', format)
+        
+        converted_input = pandoc_convert(html_input, 'html', format)
         
     ##############################
     # latex
@@ -44,8 +44,8 @@
     elif mime_type in ['text/html', 'application/xhtml+xml']:
         if format == 'html':
             converted_input = input
-        else:
-            converted_input = pandoc_convert(input, 'html', format)
+        
+        converted_input = pandoc_convert(input, 'html', format)
     ##############################
     # anything looks like text -> markdown
     elif mime_type in ['text/plain',
--- a/src/cm/converters/pandoc_converters.py	Tue Apr 20 10:47:42 2010 +0200
+++ b/src/cm/converters/pandoc_converters.py	Tue Apr 20 11:14:21 2010 +0200
@@ -13,7 +13,8 @@
 from cm.utils.string_utils import to_unicode
 
 PANDOC_BIN = "pandoc"
-PANDOC_OPTIONS = " -R "
+PANDOC_OPTIONS = " --sanitize-html "
+PANDOC_OPTIONS_RAW = " -R "
 
 MARKDOWN2PDF_BIN = "markdown2pdf"
 
@@ -37,7 +38,7 @@
 _PANDOC_ENCODING = 'utf8'
 
 @memoize
-def pandoc_convert(content, from_format, to_format, full=False):
+def pandoc_convert(content, from_format, to_format, full=False, raw=False):
     """
     Convert markdown content to pdf
     
@@ -56,7 +57,7 @@
         if from_format != 'markdown':
             content = pandoc_convert(content, from_format, 'markdown', True)
         return pandoc_markdown2pdf(content)
-    return pandoc_pandoc(content, from_format, to_format, full)
+    return pandoc_pandoc(content, from_format, to_format, full, from_format==to_format=='html') # use raw pandoc convertion if html->html
 
 def content_or_file_name(content, file_name):
     if not content and not file_name:
@@ -154,7 +155,7 @@
 # TODO: use tidy to cleanup html
 
 @memoize
-def pandoc_pandoc(content, from_format, to_format, full=False):
+def pandoc_pandoc(content, from_format, to_format, full=False, raw=False):
     """
     Convert content (should be unicode) from from_format to to_format
     (if full: includes header & co [html, latex])
@@ -189,7 +190,11 @@
     input_file.close()
     
     # pandoc arguments and command line
-    cmd_args = ' %s -o %s ' %(PANDOC_OPTIONS,output_temp_name) 
+    p_options = PANDOC_OPTIONS
+    if raw:
+        p_options = PANDOC_OPTIONS_RAW
+                
+    cmd_args = ' %s -o %s ' %(p_options,output_temp_name) 
     if full:
         cmd_args += ' -s '
     cmd_args += ' -f %s ' % from_format
--- a/src/cm/models.py	Tue Apr 20 10:47:42 2010 +0200
+++ b/src/cm/models.py	Tue Apr 20 11:14:21 2010 +0200
@@ -189,10 +189,7 @@
     objects = TextVersionManager()
     
     def get_content(self, format='html'):
-        if format == self.format:
-            return self.content
-        else:
-            return pandoc_convert(self.content, self.format, format)
+        return pandoc_convert(self.content, self.format, format)
 #    def _get_comments(self, user = None, filter_reply = 0):        
 #        """
 #        get comments viewable by this user (user = None or user = AnonymousUser => everyone)