src/cm/converters/__init__.py
changeset 360 bfaab8740995
parent 259 0371caf8bcc6
child 416 5573b959131d
equal deleted inserted replaced
359:0bab4ef95bfe 360:bfaab8740995
     1 from pandoc_converters import pandoc_convert
     1 from pandoc_converters import pandoc_convert
     2 import chardet 
     2 import chardet 
     3 from cm.utils.string_utils import to_unicode 
     3 from cm.utils.string_utils import to_unicode 
     4 import re
     4 import re
     5 import os
     5 import os
     6 from cm.converters.oo_converters import extract_css_body
     6 from oo_converters import extract_css_body
     7 
     7 
     8 
     8 
     9 # TODO: move that in text_base: save images
     9 # TODO: move that in text_base: save images
    10 def convert_from_mimetype(file_name, mime_type, format):
    10 def convert_from_mimetype(file_name, mime_type, format):
    11     input = open(file_name, 'r').read()
    11     input = open(file_name, 'r').read()
    16     #input = to_unicode(input)
    16     #input = to_unicode(input)
    17         
    17         
    18     attachs = []
    18     attachs = []
    19     attachs_dir = None
    19     attachs_dir = None
    20     ##############################
    20     ##############################
       
    21     # OO/MS-Word
    21     if mime_type in ['application/vnd.oasis.opendocument.text',
    22     if mime_type in ['application/vnd.oasis.opendocument.text',
    22                      'application/msword',
    23                      'application/msword',
       
    24                      'application/vnd.openxmlformats-officedocument.wordprocessingml.document'
    23                      ]:
    25                      ]:
    24         
    26         
    25         html_input, xhtml_input, attachs = convert_oo_to_html_and_xhtml(input)
    27         from cm.cm_settings import USE_ABI
    26         if format == 'html':
    28         if USE_ABI:
       
    29           from abi_converters import AbiFileConverter
       
    30           converter = AbiFileConverter()
       
    31           html_input, attachs = converter.convert_to_html(input)
       
    32           html_input = re.sub(r' awml:style="[^"]*"', '', html_input)
       
    33           converted_input = pandoc_convert(html_input, 'html', format)
       
    34         else:
       
    35           html_input, xhtml_input, attachs = convert_oo_to_html_and_xhtml(input)
       
    36           if format == 'html':
    27                 _not_used_css, converted_input = extract_css_body(xhtml_input)
    37                 _not_used_css, converted_input = extract_css_body(xhtml_input)
    28                 #converted_input = xhtml_input
    38                 #converted_input = xhtml_input
    29         
    39         
    30         converted_input = pandoc_convert(html_input, 'html', format)
    40           converted_input = pandoc_convert(html_input, 'html', format)
    31         
    41         
    32     ##############################
    42     ##############################
    33     # latex
    43     # latex
    34     elif mime_type in ['application/x-latex','text/x-tex',]:
    44     elif mime_type in ['application/x-latex','text/x-tex',]:
    35         converted_input = pandoc_convert(to_unicode(input), 'latex', format)
    45         converted_input = pandoc_convert(to_unicode(input), 'latex', format)