11 import StringIO |
11 import StringIO |
12 import tidy |
12 import tidy |
13 from cm.utils.string_utils import to_unicode |
13 from cm.utils.string_utils import to_unicode |
14 from xml.dom.minidom import parseString |
14 from xml.dom.minidom import parseString |
15 import re |
15 import re |
|
16 from distutils.version import LooseVersion |
16 |
17 |
17 PANDOC_BIN = "pandoc" |
18 PANDOC_BIN = "pandoc" |
18 PANDOC_OPTIONS = " --sanitize-html --email-obfuscation=none " |
19 import commands |
|
20 PANDOC_VERSION = commands.getstatusoutput(PANDOC_BIN + " -v|head -n 1|awk '{print $2;}'")[1] |
|
21 if LooseVersion(PANDOC_VERSION) < '1.8': |
|
22 PANDOC_OPTIONS = " --sanitize-html --email-obfuscation=none " |
|
23 else: |
|
24 PANDOC_OPTIONS = " --email-obfuscation=none " |
|
25 |
19 PANDOC_OPTIONS_RAW = " -R --email-obfuscation=none " |
26 PANDOC_OPTIONS_RAW = " -R --email-obfuscation=none " |
20 |
27 |
21 MARKDOWN2PDF_BIN = "markdown2pdf" |
28 if LooseVersion(PANDOC_VERSION) < '1.9': |
|
29 MARKDOWN2PDF_BIN = "markdown2pdf" |
|
30 else: |
|
31 MARKDOWN2PDF_BIN = None |
22 |
32 |
23 # make sure binaries are available |
33 # make sure binaries are available |
24 from cm.utils.system import bin_search |
34 from cm.utils.system import bin_search |
25 bin_search(PANDOC_BIN) |
35 bin_search(PANDOC_BIN) |
26 bin_search(MARKDOWN2PDF_BIN) |
36 if MARKDOWN2PDF_BIN: |
|
37 bin_search(MARKDOWN2PDF_BIN) |
27 |
38 |
28 # pandoc capabilities |
39 # pandoc capabilities |
29 INPUT_FORMATS = ['native', 'markdown', 'rst', 'html', 'latex'] |
40 INPUT_FORMATS = ['native', 'markdown', 'rst', 'html', 'latex'] |
30 OUTPUT_FORMATS = ['native', 'html', 's5', 'docbook', 'opendocument', 'odt', 'latex', 'context', 'texinfo', 'man', 'markdown', 'rst', 'mediawiki', 'rtf'] |
41 OUTPUT_FORMATS = ['native', 'html', 's5', 'docbook', 'opendocument', 'odt', 'latex', 'context', 'texinfo', 'man', 'markdown', 'rst', 'mediawiki', 'rtf', 'pdf'] |
31 |
|
32 # add pdf output using markdown2pdf |
|
33 OUTPUT_FORMATS.append('pdf') |
|
34 |
42 |
35 # input formats |
43 # input formats |
36 CHOICES_INPUT_FORMATS = [(f, f) for f in ['markdown', 'rst', 'html']] |
44 CHOICES_INPUT_FORMATS = [(f, f) for f in ['markdown', 'rst', 'html']] |
37 |
45 |
38 DEFAULT_INPUT_FORMAT = 'markdown' |
46 DEFAULT_INPUT_FORMAT = 'markdown' |
53 content = do_tidy(content) |
61 content = do_tidy(content) |
54 except: |
62 except: |
55 # tidy fails ... try pandoc anyway... |
63 # tidy fails ... try pandoc anyway... |
56 content = to_unicode(content) |
64 content = to_unicode(content) |
57 # if to_format is pdf: use markdown2pdf |
65 # if to_format is pdf: use markdown2pdf |
58 if to_format == 'pdf': |
66 if MARKDOWN2PDF_BIN and to_format == 'pdf': |
59 if from_format != 'markdown': |
67 if from_format != 'markdown': |
60 content = pandoc_convert(content, from_format, 'markdown', True) |
68 content = pandoc_convert(content, from_format, 'markdown', True) |
61 return pandoc_markdown2pdf(content) |
69 return pandoc_markdown2pdf(content) |
62 return pandoc_pandoc(content, from_format, to_format, full, from_format==to_format=='html') # use raw pandoc convertion if html->html |
70 return pandoc_pandoc(content, from_format, to_format, full, from_format==to_format=='html') # use raw pandoc convertion if html->html |
63 |
71 |
189 if type(content) != unicode: |
197 if type(content) != unicode: |
190 raise Exception('Content is not in unicode format!') |
198 raise Exception('Content is not in unicode format!') |
191 |
199 |
192 # temp file |
200 # temp file |
193 input_file, input_temp_name = get_filetemp('w', 'input') |
201 input_file, input_temp_name = get_filetemp('w', 'input') |
194 output_temp_fp, output_temp_name = get_filetemp('r', 'output') |
202 # For some reason when pandoc > 1.9 converts to PDF, '-t' shouldn't be used but output file name extension has to be '.pdf' |
|
203 if to_format != 'pdf': |
|
204 output_temp_fp, output_temp_name = get_filetemp('r', 'output') |
|
205 else: |
|
206 output_temp_fp, output_temp_name = get_filetemp('r', 'output.pdf') |
195 output_temp_fp.close() |
207 output_temp_fp.close() |
196 |
208 |
197 error_temp_fp, error_temp_name = get_filetemp('w', 'err') |
209 error_temp_fp, error_temp_name = get_filetemp('w', 'err') |
198 error_temp_fp.close() |
210 error_temp_fp.close() |
199 |
211 |
229 |
241 |
230 cmd_args = ' %s -o %s ' %(p_options,output_temp_name) |
242 cmd_args = ' %s -o %s ' %(p_options,output_temp_name) |
231 if full: |
243 if full: |
232 cmd_args += ' -s ' |
244 cmd_args += ' -s ' |
233 cmd_args += ' -f %s ' % from_format |
245 cmd_args += ' -f %s ' % from_format |
234 cmd_args += ' -t %s ' % to_format |
246 if to_format != 'pdf': |
|
247 cmd_args += ' -t %s ' % to_format |
235 cmd_args += ' %s ' % input_temp_name |
248 cmd_args += ' %s ' % input_temp_name |
236 cmd = PANDOC_BIN + ' ' + cmd_args |
249 cmd = PANDOC_BIN + ' ' + cmd_args |
237 |
250 |
238 #from socommons.converters.new_conv import controlled_Popen |
251 #from socommons.converters.new_conv import controlled_Popen |
239 #controlled_Popen(cmd, stderr=file(error_temp_name,'w')) |
252 #controlled_Popen(cmd, stderr=file(error_temp_name,'w')) |