src/cm/converters/pandoc_converters.py
branchpreserve_html
changeset 259 0371caf8bcc6
parent 252 0f0a79f7f213
child 261 b60ab54b6782
equal deleted inserted replaced
258:a79a3c91d9b5 259:0371caf8bcc6
    11 import StringIO
    11 import StringIO
    12 import tidy
    12 import tidy
    13 from cm.utils.string_utils import to_unicode
    13 from cm.utils.string_utils import to_unicode
    14 
    14 
    15 PANDOC_BIN = "pandoc"
    15 PANDOC_BIN = "pandoc"
    16 PANDOC_OPTIONS = " -R "
    16 PANDOC_OPTIONS = " --sanitize-html "
       
    17 PANDOC_OPTIONS_RAW = " -R "
    17 
    18 
    18 MARKDOWN2PDF_BIN = "markdown2pdf"
    19 MARKDOWN2PDF_BIN = "markdown2pdf"
    19 
    20 
    20 # make sure binaries are available
    21 # make sure binaries are available
    21 from cm.utils.system import bin_search
    22 from cm.utils.system import bin_search
    35 DEFAULT_INPUT_FORMAT = 'markdown'
    36 DEFAULT_INPUT_FORMAT = 'markdown'
    36 
    37 
    37 _PANDOC_ENCODING = 'utf8'
    38 _PANDOC_ENCODING = 'utf8'
    38 
    39 
    39 @memoize
    40 @memoize
    40 def pandoc_convert(content, from_format, to_format, full=False):
    41 def pandoc_convert(content, from_format, to_format, full=False, raw=False):
    41     """
    42     """
    42     Convert markdown content to pdf
    43     Convert markdown content to pdf
    43     
    44     
    44     >>> res = pandoc_convert('<span>dssd', 'html', 'pdf')
    45     >>> res = pandoc_convert('<span>dssd', 'html', 'pdf')
    45     """
    46     """
    54     # if to_format is pdf: use markdown2pdf
    55     # if to_format is pdf: use markdown2pdf
    55     if to_format == 'pdf':        
    56     if to_format == 'pdf':        
    56         if from_format != 'markdown':
    57         if from_format != 'markdown':
    57             content = pandoc_convert(content, from_format, 'markdown', True)
    58             content = pandoc_convert(content, from_format, 'markdown', True)
    58         return pandoc_markdown2pdf(content)
    59         return pandoc_markdown2pdf(content)
    59     return pandoc_pandoc(content, from_format, to_format, full)
    60     return pandoc_pandoc(content, from_format, to_format, full, from_format==to_format=='html') # use raw pandoc convertion if html->html
    60 
    61 
    61 def content_or_file_name(content, file_name):
    62 def content_or_file_name(content, file_name):
    62     if not content and not file_name:
    63     if not content and not file_name:
    63         raise Exception('You should provide either a content or a file_name')
    64         raise Exception('You should provide either a content or a file_name')
    64     if content and file_name:
    65     if content and file_name:
   152     
   153     
   153 # TODO: manage images in pandoc (?)
   154 # TODO: manage images in pandoc (?)
   154 # TODO: use tidy to cleanup html
   155 # TODO: use tidy to cleanup html
   155 
   156 
   156 @memoize
   157 @memoize
   157 def pandoc_pandoc(content, from_format, to_format, full=False):
   158 def pandoc_pandoc(content, from_format, to_format, full=False, raw=False):
   158     """
   159     """
   159     Convert content (should be unicode) from from_format to to_format
   160     Convert content (should be unicode) from from_format to to_format
   160     (if full: includes header & co [html, latex])
   161     (if full: includes header & co [html, latex])
   161     Returns out (unicode), err
   162     Returns out (unicode), err
   162     
   163     
   187     
   188     
   188     input_file.write(content.encode(_PANDOC_ENCODING))
   189     input_file.write(content.encode(_PANDOC_ENCODING))
   189     input_file.close()
   190     input_file.close()
   190     
   191     
   191     # pandoc arguments and command line
   192     # pandoc arguments and command line
   192     cmd_args = ' %s -o %s ' %(PANDOC_OPTIONS,output_temp_name) 
   193     p_options = PANDOC_OPTIONS
       
   194     if raw:
       
   195         p_options = PANDOC_OPTIONS_RAW
       
   196                 
       
   197     cmd_args = ' %s -o %s ' %(p_options,output_temp_name) 
   193     if full:
   198     if full:
   194         cmd_args += ' -s '
   199         cmd_args += ' -s '
   195     cmd_args += ' -f %s ' % from_format
   200     cmd_args += ' -f %s ' % from_format
   196     cmd_args += ' -t %s ' % to_format
   201     cmd_args += ' -t %s ' % to_format
   197     cmd_args += ' %s ' % input_temp_name
   202     cmd_args += ' %s ' % input_temp_name