src/cm/converters/__init__.py
changeset 77 fe91eb717a96
parent 50 6db6c011a310
child 78 dda94db1149a
equal deleted inserted replaced
76:f35d85b2c760 77:fe91eb717a96
     1 from oo_converters import convert
       
     2 from pandoc_converters import pandoc_convert
     1 from pandoc_converters import pandoc_convert
     3 import chardet 
     2 import chardet 
       
     3 import re
     4 
     4 
     5 # TODO: move that in text_base: save images
     5 # TODO: move that in text_base: save images
     6 def convert_from_mimetype(file_name, mime_type, format):
     6 def convert_from_mimetype(file_name, mime_type, format):
     7     input = open(file_name, 'r').read()
     7     input = open(file_name, 'r').read()
     8     return _convert_from_mimetype(input, mime_type, format)
     8     return _convert_from_mimetype(input, mime_type, format)
    85         result.append(img_path)
    85         result.append(img_path)
    86         last_index = match_xhtml.end() - 1 # -1 because trailing "
    86         last_index = match_xhtml.end() - 1 # -1 because trailing "
    87     result.append(xhtml[last_index:len(xhtml)])
    87     result.append(xhtml[last_index:len(xhtml)])
    88     return u''.join(result)
    88     return u''.join(result)
    89 
    89 
    90 def convert_oo_to_html(input):    
    90 def convert_oo_to_html(input):
       
    91     from oo_converters import convert    
    91     html_input, images = convert(input, 'html')
    92     html_input, images = convert(input, 'html')
    92     
    93     
    93     enc = chardet.detect(html_input)['encoding']
    94     enc = chardet.detect(html_input)['encoding']
    94     try_encodings = [enc, 'utf8', 'latin1']
    95     try_encodings = [enc, 'utf8', 'latin1']
    95     res_content = None
    96     res_content = None
   101             pass
   102             pass
   102     if not res_content_html:
   103     if not res_content_html:
   103         raise Exception('UnicodeDecodeError: could not decode')
   104         raise Exception('UnicodeDecodeError: could not decode')
   104     return res_content_html, images
   105     return res_content_html, images
   105 
   106 
   106 def old_convert_oo_to_html(input):    
   107 def old_convert_oo_to_html(input): 
       
   108     from oo_converters import convert   
   107     html_input, images = convert(input, 'html')
   109     html_input, images = convert(input, 'html')
   108     xhtml_input, _not_used_ = convert(input, 'xhtml')
   110     xhtml_input, _not_used_ = convert(input, 'xhtml')
   109     
   111     
   110     enc = chardet.detect(xhtml_input)['encoding']
   112     enc = chardet.detect(xhtml_input)['encoding']
   111     try_encodings = [enc, 'utf8', 'latin1']
   113     try_encodings = [enc, 'utf8', 'latin1']