src/cm/converters/abi_converters.py
changeset 457 f62f7f0bcaa4
parent 456 b2a7c015362b
child 555 5d79dc4e50a3
equal deleted inserted replaced
456:b2a7c015362b 457:f62f7f0bcaa4
     3 import re
     3 import re
     4 
     4 
     5 import pexpect
     5 import pexpect
     6 
     6 
     7 from abi_error import AbiConverterError, AbiCommandError
     7 from abi_error import AbiConverterError, AbiCommandError
       
     8 from cm.converters.pandoc_converters import do_tidy
     8 
     9 
     9 
    10 
    10 TYPES_IN  = {'602': '602',       'abw': 'abw',       'aw': 'aw',     
    11 TYPES_IN  = {'602': '602',       'abw': 'abw',       'aw': 'aw',     
    11              'awt': 'awt',       'cwk': 'cwk',       'dbk': 'dbk',   
    12              'awt': 'awt',       'cwk': 'cwk',       'dbk': 'dbk',   
    12              'doc': 'doc',       'docm': 'docm',     'docx': 'docx', 
    13              'doc': 'doc',       'docm': 'docm',     'docx': 'docx', 
   300             p['style'] = s.cssText
   301             p['style'] = s.cssText
   301 
   302 
   302           except KeyError:
   303           except KeyError:
   303             p['style'] = 'margin-top: 10pt; margin-bottom: 10pt;';
   304             p['style'] = 'margin-top: 10pt; margin-bottom: 10pt;';
   304 
   305 
   305         # for some reason having DOCTYPE declaration makes soup unhappy
   306         return do_tidy(unicode(soup))
   306         output = re.sub(r'<!(<!DOCTYPE html[^>]*>)>', r'\1', unicode(soup))
       
   307         # And for some reason, & is not converted to &amp; from time to time!
       
   308         output = re.sub(r'&(?![A-Za-z]+[0-9]*;|#[0-9]+;|#x[0-9a-fA-F]+;)', r'&amp;', output)
       
   309         return output