--- a/src/cm/converters/__init__.py Tue Apr 20 10:47:42 2010 +0200
+++ b/src/cm/converters/__init__.py Tue Apr 20 11:14:21 2010 +0200
@@ -26,8 +26,8 @@
if format == 'html':
_not_used_css, converted_input = extract_css_body(xhtml_input)
#converted_input = xhtml_input
- else:
- converted_input = pandoc_convert(html_input, 'html', format)
+
+ converted_input = pandoc_convert(html_input, 'html', format)
##############################
# latex
@@ -44,8 +44,8 @@
elif mime_type in ['text/html', 'application/xhtml+xml']:
if format == 'html':
converted_input = input
- else:
- converted_input = pandoc_convert(input, 'html', format)
+
+ converted_input = pandoc_convert(input, 'html', format)
##############################
# anything looks like text -> markdown
elif mime_type in ['text/plain',
--- a/src/cm/converters/pandoc_converters.py Tue Apr 20 10:47:42 2010 +0200
+++ b/src/cm/converters/pandoc_converters.py Tue Apr 20 11:14:21 2010 +0200
@@ -13,7 +13,8 @@
from cm.utils.string_utils import to_unicode
PANDOC_BIN = "pandoc"
-PANDOC_OPTIONS = " -R "
+PANDOC_OPTIONS = " --sanitize-html "
+PANDOC_OPTIONS_RAW = " -R "
MARKDOWN2PDF_BIN = "markdown2pdf"
@@ -37,7 +38,7 @@
_PANDOC_ENCODING = 'utf8'
@memoize
-def pandoc_convert(content, from_format, to_format, full=False):
+def pandoc_convert(content, from_format, to_format, full=False, raw=False):
"""
Convert markdown content to pdf
@@ -56,7 +57,7 @@
if from_format != 'markdown':
content = pandoc_convert(content, from_format, 'markdown', True)
return pandoc_markdown2pdf(content)
- return pandoc_pandoc(content, from_format, to_format, full)
+ return pandoc_pandoc(content, from_format, to_format, full, from_format==to_format=='html') # use raw pandoc convertion if html->html
def content_or_file_name(content, file_name):
if not content and not file_name:
@@ -154,7 +155,7 @@
# TODO: use tidy to cleanup html
@memoize
-def pandoc_pandoc(content, from_format, to_format, full=False):
+def pandoc_pandoc(content, from_format, to_format, full=False, raw=False):
"""
Convert content (should be unicode) from from_format to to_format
(if full: includes header & co [html, latex])
@@ -189,7 +190,11 @@
input_file.close()
# pandoc arguments and command line
- cmd_args = ' %s -o %s ' %(PANDOC_OPTIONS,output_temp_name)
+ p_options = PANDOC_OPTIONS
+ if raw:
+ p_options = PANDOC_OPTIONS_RAW
+
+ cmd_args = ' %s -o %s ' %(p_options,output_temp_name)
if full:
cmd_args += ' -s '
cmd_args += ' -f %s ' % from_format
--- a/src/cm/models.py Tue Apr 20 10:47:42 2010 +0200
+++ b/src/cm/models.py Tue Apr 20 11:14:21 2010 +0200
@@ -189,10 +189,7 @@
objects = TextVersionManager()
def get_content(self, format='html'):
- if format == self.format:
- return self.content
- else:
- return pandoc_convert(self.content, self.format, format)
+ return pandoc_convert(self.content, self.format, format)
# def _get_comments(self, user = None, filter_reply = 0):
# """
# get comments viewable by this user (user = None or user = AnonymousUser => everyone)