diff -r 3eb5299e8085 -r 0f0a79f7f213 src/cm/converters/__init__.py --- a/src/cm/converters/__init__.py Thu Apr 15 14:35:44 2010 +0200 +++ b/src/cm/converters/__init__.py Thu Apr 15 16:38:45 2010 +0200 @@ -2,6 +2,8 @@ import chardet from cm.utils.string_utils import to_unicode import re +from cm.converters.oo_converters import extract_css_body + # TODO: move that in text_base: save images def convert_from_mimetype(file_name, mime_type, format): @@ -19,8 +21,12 @@ 'application/msword', ]: - xhtml_input, attachs = convert_oo_to_html(input) - converted_input = pandoc_convert(xhtml_input, 'html', format) + html_input, xhtml_input, attachs = convert_oo_to_html_and_xhtml(input) + if format == 'html': + _not_used_css, converted_input = extract_css_body(xhtml_input) + #converted_input = xhtml_input + else: + converted_input = pandoc_convert(html_input, 'html', format) ############################## # latex @@ -88,7 +94,6 @@ enc = chardet.detect(html_input)['encoding'] try_encodings = [enc, 'utf8', 'latin1'] - res_content = None for encoding in try_encodings: try: res_content_html = unicode(html_input, encoding) @@ -99,14 +104,13 @@ raise Exception('UnicodeDecodeError: could not decode') return res_content_html, images -def old_convert_oo_to_html(input): +def convert_oo_to_html_and_xhtml(input): from oo_converters import convert html_input, images = convert(input, 'html') xhtml_input, _not_used_ = convert(input, 'xhtml') enc = chardet.detect(xhtml_input)['encoding'] try_encodings = [enc, 'utf8', 'latin1'] - res_content = None for encoding in try_encodings: try: # TODO: fix path and manage images @@ -120,8 +124,11 @@ pass if not res_content_html or not res_content_xhtml: raise Exception('UnicodeDecodeError: could not decode') - return res_content_html, res_content_xhtml, images + return res_content_html, cleanup(res_content_xhtml), images +def cleanup(string): + return string.replace(u'\xc2\xa0',u'') + def markdown_from_code(code): CODE_INDICATOR = " " # 4 spaces return '\n'.join([CODE_INDICATOR + line for line in code.split('\n')])