1 from pandoc_converters import pandoc_convert |
1 from pandoc_converters import pandoc_convert |
2 import chardet |
2 import chardet |
3 from cm.utils.string_utils import to_unicode |
3 from cm.utils.string_utils import to_unicode |
4 import re |
4 import re |
5 import os |
5 import os |
6 from cm.converters.oo_converters import extract_css_body |
6 from oo_converters import extract_css_body |
7 |
7 |
8 |
8 |
9 # TODO: move that in text_base: save images |
9 # TODO: move that in text_base: save images |
10 def convert_from_mimetype(file_name, mime_type, format): |
10 def convert_from_mimetype(file_name, mime_type, format): |
11 input = open(file_name, 'r').read() |
11 input = open(file_name, 'r').read() |
16 #input = to_unicode(input) |
16 #input = to_unicode(input) |
17 |
17 |
18 attachs = [] |
18 attachs = [] |
19 attachs_dir = None |
19 attachs_dir = None |
20 ############################## |
20 ############################## |
|
21 # OO/MS-Word |
21 if mime_type in ['application/vnd.oasis.opendocument.text', |
22 if mime_type in ['application/vnd.oasis.opendocument.text', |
22 'application/msword', |
23 'application/msword', |
|
24 'application/vnd.openxmlformats-officedocument.wordprocessingml.document' |
23 ]: |
25 ]: |
24 |
26 |
25 html_input, xhtml_input, attachs = convert_oo_to_html_and_xhtml(input) |
27 from cm.cm_settings import USE_ABI |
26 if format == 'html': |
28 if USE_ABI: |
|
29 from abi_converters import AbiFileConverter |
|
30 converter = AbiFileConverter() |
|
31 html_input, attachs = converter.convert_to_html(input) |
|
32 html_input = re.sub(r' awml:style="[^"]*"', '', html_input) |
|
33 converted_input = pandoc_convert(html_input, 'html', format) |
|
34 else: |
|
35 html_input, xhtml_input, attachs = convert_oo_to_html_and_xhtml(input) |
|
36 if format == 'html': |
27 _not_used_css, converted_input = extract_css_body(xhtml_input) |
37 _not_used_css, converted_input = extract_css_body(xhtml_input) |
28 #converted_input = xhtml_input |
38 #converted_input = xhtml_input |
29 |
39 |
30 converted_input = pandoc_convert(html_input, 'html', format) |
40 converted_input = pandoc_convert(html_input, 'html', format) |
31 |
41 |
32 ############################## |
42 ############################## |
33 # latex |
43 # latex |
34 elif mime_type in ['application/x-latex','text/x-tex',]: |
44 elif mime_type in ['application/x-latex','text/x-tex',]: |
35 converted_input = pandoc_convert(to_unicode(input), 'latex', format) |
45 converted_input = pandoc_convert(to_unicode(input), 'latex', format) |