11 import StringIO |
11 import StringIO |
12 import tidy |
12 import tidy |
13 from cm.utils.string_utils import to_unicode |
13 from cm.utils.string_utils import to_unicode |
14 |
14 |
15 PANDOC_BIN = "pandoc" |
15 PANDOC_BIN = "pandoc" |
16 PANDOC_OPTIONS = " -R " |
16 PANDOC_OPTIONS = " --sanitize-html " |
|
17 PANDOC_OPTIONS_RAW = " -R " |
17 |
18 |
18 MARKDOWN2PDF_BIN = "markdown2pdf" |
19 MARKDOWN2PDF_BIN = "markdown2pdf" |
19 |
20 |
20 # make sure binaries are available |
21 # make sure binaries are available |
21 from cm.utils.system import bin_search |
22 from cm.utils.system import bin_search |
35 DEFAULT_INPUT_FORMAT = 'markdown' |
36 DEFAULT_INPUT_FORMAT = 'markdown' |
36 |
37 |
37 _PANDOC_ENCODING = 'utf8' |
38 _PANDOC_ENCODING = 'utf8' |
38 |
39 |
39 @memoize |
40 @memoize |
40 def pandoc_convert(content, from_format, to_format, full=False): |
41 def pandoc_convert(content, from_format, to_format, full=False, raw=False): |
41 """ |
42 """ |
42 Convert markdown content to pdf |
43 Convert markdown content to pdf |
43 |
44 |
44 >>> res = pandoc_convert('<span>dssd', 'html', 'pdf') |
45 >>> res = pandoc_convert('<span>dssd', 'html', 'pdf') |
45 """ |
46 """ |
54 # if to_format is pdf: use markdown2pdf |
55 # if to_format is pdf: use markdown2pdf |
55 if to_format == 'pdf': |
56 if to_format == 'pdf': |
56 if from_format != 'markdown': |
57 if from_format != 'markdown': |
57 content = pandoc_convert(content, from_format, 'markdown', True) |
58 content = pandoc_convert(content, from_format, 'markdown', True) |
58 return pandoc_markdown2pdf(content) |
59 return pandoc_markdown2pdf(content) |
59 return pandoc_pandoc(content, from_format, to_format, full) |
60 return pandoc_pandoc(content, from_format, to_format, full, from_format==to_format=='html') # use raw pandoc convertion if html->html |
60 |
61 |
61 def content_or_file_name(content, file_name): |
62 def content_or_file_name(content, file_name): |
62 if not content and not file_name: |
63 if not content and not file_name: |
63 raise Exception('You should provide either a content or a file_name') |
64 raise Exception('You should provide either a content or a file_name') |
64 if content and file_name: |
65 if content and file_name: |
152 |
153 |
153 # TODO: manage images in pandoc (?) |
154 # TODO: manage images in pandoc (?) |
154 # TODO: use tidy to cleanup html |
155 # TODO: use tidy to cleanup html |
155 |
156 |
156 @memoize |
157 @memoize |
157 def pandoc_pandoc(content, from_format, to_format, full=False): |
158 def pandoc_pandoc(content, from_format, to_format, full=False, raw=False): |
158 """ |
159 """ |
159 Convert content (should be unicode) from from_format to to_format |
160 Convert content (should be unicode) from from_format to to_format |
160 (if full: includes header & co [html, latex]) |
161 (if full: includes header & co [html, latex]) |
161 Returns out (unicode), err |
162 Returns out (unicode), err |
162 |
163 |
187 |
188 |
188 input_file.write(content.encode(_PANDOC_ENCODING)) |
189 input_file.write(content.encode(_PANDOC_ENCODING)) |
189 input_file.close() |
190 input_file.close() |
190 |
191 |
191 # pandoc arguments and command line |
192 # pandoc arguments and command line |
192 cmd_args = ' %s -o %s ' %(PANDOC_OPTIONS,output_temp_name) |
193 p_options = PANDOC_OPTIONS |
|
194 if raw: |
|
195 p_options = PANDOC_OPTIONS_RAW |
|
196 |
|
197 cmd_args = ' %s -o %s ' %(p_options,output_temp_name) |
193 if full: |
198 if full: |
194 cmd_args += ' -s ' |
199 cmd_args += ' -s ' |
195 cmd_args += ' -f %s ' % from_format |
200 cmd_args += ' -f %s ' % from_format |
196 cmd_args += ' -t %s ' % to_format |
201 cmd_args += ' -t %s ' % to_format |
197 cmd_args += ' %s ' % input_temp_name |
202 cmd_args += ' %s ' % input_temp_name |