equal
deleted
inserted
replaced
8 from subprocess import Popen, PIPE, call |
8 from subprocess import Popen, PIPE, call |
9 import os |
9 import os |
10 from tempfile import mkstemp |
10 from tempfile import mkstemp |
11 import StringIO |
11 import StringIO |
12 import tidy |
12 import tidy |
13 |
13 from cm.utils.string import to_unicode |
14 |
14 |
15 PANDOC_BIN = "pandoc" |
15 PANDOC_BIN = "pandoc" |
16 PANDOC_OPTIONS = "--sanitize-html " |
16 PANDOC_OPTIONS = "--sanitize-html " |
17 |
17 |
18 MARKDOWN2PDF_BIN = "markdown2pdf" |
18 MARKDOWN2PDF_BIN = "markdown2pdf" |
44 >>> res = pandoc_convert('<span>dssd', 'html', 'pdf') |
44 >>> res = pandoc_convert('<span>dssd', 'html', 'pdf') |
45 """ |
45 """ |
46 # pandoc does not react well when html is not valid |
46 # pandoc does not react well when html is not valid |
47 # use tidy to clean html |
47 # use tidy to clean html |
48 if from_format == 'html': |
48 if from_format == 'html': |
49 content = do_tidy(content) |
49 try: |
|
50 content = do_tidy(content) |
|
51 except: |
|
52 # tidy fails ... try pandoc anyway... |
|
53 content = to_unicode(content) |
50 # if to_format is pdf: use markdown2pdf |
54 # if to_format is pdf: use markdown2pdf |
51 if to_format == 'pdf': |
55 if to_format == 'pdf': |
52 if from_format != 'markdown': |
56 if from_format != 'markdown': |
53 content = pandoc_convert(content, from_format, 'markdown', True) |
57 content = pandoc_convert(content, from_format, 'markdown', True) |
54 return pandoc_markdown2pdf(content) |
58 return pandoc_markdown2pdf(content) |
81 indent=0, |
85 indent=0, |
82 tidy_mark=0, |
86 tidy_mark=0, |
83 input_encoding='utf8', |
87 input_encoding='utf8', |
84 output_encoding='utf8', |
88 output_encoding='utf8', |
85 ) |
89 ) |
86 tidyied_content = tidy.parseString(content.encode('utf8'), **tidy_options) |
90 tidyied_content = tidy.parseString(to_unicode(content).encode('utf8'), **tidy_options) |
87 tidyied_content = str(tidyied_content) |
91 tidyied_content = str(tidyied_content) |
88 if content and not tidyied_content.strip(): |
92 if content and not tidyied_content.strip(): |
89 raise Exception('Content could not be tidyfied') |
93 raise Exception('Content could not be tidyfied') |
90 return str(tidyied_content).decode('utf8') |
94 return str(tidyied_content).decode('utf8') |
91 |
95 |