# HG changeset patch # User raph # Date 1271754861 -7200 # Node ID 0371caf8bcc6864a0f2250933431d3a913c9b379 # Parent a79a3c91d9b591f292b08c77d5fe8ebb447e3d2b always use pandoc but in raw mode for html->html convert diff -r a79a3c91d9b5 -r 0371caf8bcc6 src/cm/converters/__init__.py --- a/src/cm/converters/__init__.py Tue Apr 20 10:47:42 2010 +0200 +++ b/src/cm/converters/__init__.py Tue Apr 20 11:14:21 2010 +0200 @@ -26,8 +26,8 @@ if format == 'html': _not_used_css, converted_input = extract_css_body(xhtml_input) #converted_input = xhtml_input - else: - converted_input = pandoc_convert(html_input, 'html', format) + + converted_input = pandoc_convert(html_input, 'html', format) ############################## # latex @@ -44,8 +44,8 @@ elif mime_type in ['text/html', 'application/xhtml+xml']: if format == 'html': converted_input = input - else: - converted_input = pandoc_convert(input, 'html', format) + + converted_input = pandoc_convert(input, 'html', format) ############################## # anything looks like text -> markdown elif mime_type in ['text/plain', diff -r a79a3c91d9b5 -r 0371caf8bcc6 src/cm/converters/pandoc_converters.py --- a/src/cm/converters/pandoc_converters.py Tue Apr 20 10:47:42 2010 +0200 +++ b/src/cm/converters/pandoc_converters.py Tue Apr 20 11:14:21 2010 +0200 @@ -13,7 +13,8 @@ from cm.utils.string_utils import to_unicode PANDOC_BIN = "pandoc" -PANDOC_OPTIONS = " -R " +PANDOC_OPTIONS = " --sanitize-html " +PANDOC_OPTIONS_RAW = " -R " MARKDOWN2PDF_BIN = "markdown2pdf" @@ -37,7 +38,7 @@ _PANDOC_ENCODING = 'utf8' @memoize -def pandoc_convert(content, from_format, to_format, full=False): +def pandoc_convert(content, from_format, to_format, full=False, raw=False): """ Convert markdown content to pdf @@ -56,7 +57,7 @@ if from_format != 'markdown': content = pandoc_convert(content, from_format, 'markdown', True) return pandoc_markdown2pdf(content) - return pandoc_pandoc(content, from_format, to_format, full) + return pandoc_pandoc(content, from_format, to_format, full, from_format==to_format=='html') # use raw pandoc convertion if html->html def content_or_file_name(content, file_name): if not content and not file_name: @@ -154,7 +155,7 @@ # TODO: use tidy to cleanup html @memoize -def pandoc_pandoc(content, from_format, to_format, full=False): +def pandoc_pandoc(content, from_format, to_format, full=False, raw=False): """ Convert content (should be unicode) from from_format to to_format (if full: includes header & co [html, latex]) @@ -189,7 +190,11 @@ input_file.close() # pandoc arguments and command line - cmd_args = ' %s -o %s ' %(PANDOC_OPTIONS,output_temp_name) + p_options = PANDOC_OPTIONS + if raw: + p_options = PANDOC_OPTIONS_RAW + + cmd_args = ' %s -o %s ' %(p_options,output_temp_name) if full: cmd_args += ' -s ' cmd_args += ' -f %s ' % from_format diff -r a79a3c91d9b5 -r 0371caf8bcc6 src/cm/models.py --- a/src/cm/models.py Tue Apr 20 10:47:42 2010 +0200 +++ b/src/cm/models.py Tue Apr 20 11:14:21 2010 +0200 @@ -189,10 +189,7 @@ objects = TextVersionManager() def get_content(self, format='html'): - if format == self.format: - return self.content - else: - return pandoc_convert(self.content, self.format, format) + return pandoc_convert(self.content, self.format, format) # def _get_comments(self, user = None, filter_reply = 0): # """ # get comments viewable by this user (user = None or user = AnonymousUser => everyone)