equal
deleted
inserted
replaced
1 from oo_converters import convert |
|
2 from pandoc_converters import pandoc_convert |
1 from pandoc_converters import pandoc_convert |
3 import chardet |
2 import chardet |
|
3 import re |
4 |
4 |
5 # TODO: move that in text_base: save images |
5 # TODO: move that in text_base: save images |
6 def convert_from_mimetype(file_name, mime_type, format): |
6 def convert_from_mimetype(file_name, mime_type, format): |
7 input = open(file_name, 'r').read() |
7 input = open(file_name, 'r').read() |
8 return _convert_from_mimetype(input, mime_type, format) |
8 return _convert_from_mimetype(input, mime_type, format) |
85 result.append(img_path) |
85 result.append(img_path) |
86 last_index = match_xhtml.end() - 1 # -1 because trailing " |
86 last_index = match_xhtml.end() - 1 # -1 because trailing " |
87 result.append(xhtml[last_index:len(xhtml)]) |
87 result.append(xhtml[last_index:len(xhtml)]) |
88 return u''.join(result) |
88 return u''.join(result) |
89 |
89 |
90 def convert_oo_to_html(input): |
90 def convert_oo_to_html(input): |
|
91 from oo_converters import convert |
91 html_input, images = convert(input, 'html') |
92 html_input, images = convert(input, 'html') |
92 |
93 |
93 enc = chardet.detect(html_input)['encoding'] |
94 enc = chardet.detect(html_input)['encoding'] |
94 try_encodings = [enc, 'utf8', 'latin1'] |
95 try_encodings = [enc, 'utf8', 'latin1'] |
95 res_content = None |
96 res_content = None |
101 pass |
102 pass |
102 if not res_content_html: |
103 if not res_content_html: |
103 raise Exception('UnicodeDecodeError: could not decode') |
104 raise Exception('UnicodeDecodeError: could not decode') |
104 return res_content_html, images |
105 return res_content_html, images |
105 |
106 |
106 def old_convert_oo_to_html(input): |
107 def old_convert_oo_to_html(input): |
|
108 from oo_converters import convert |
107 html_input, images = convert(input, 'html') |
109 html_input, images = convert(input, 'html') |
108 xhtml_input, _not_used_ = convert(input, 'xhtml') |
110 xhtml_input, _not_used_ = convert(input, 'xhtml') |
109 |
111 |
110 enc = chardet.detect(xhtml_input)['encoding'] |
112 enc = chardet.detect(xhtml_input)['encoding'] |
111 try_encodings = [enc, 'utf8', 'latin1'] |
113 try_encodings = [enc, 'utf8', 'latin1'] |