equal
deleted
inserted
replaced
3 import re |
3 import re |
4 |
4 |
5 import pexpect |
5 import pexpect |
6 |
6 |
7 from abi_error import AbiConverterError, AbiCommandError |
7 from abi_error import AbiConverterError, AbiCommandError |
|
8 from cm.converters.pandoc_converters import do_tidy |
8 |
9 |
9 |
10 |
10 TYPES_IN = {'602': '602', 'abw': 'abw', 'aw': 'aw', |
11 TYPES_IN = {'602': '602', 'abw': 'abw', 'aw': 'aw', |
11 'awt': 'awt', 'cwk': 'cwk', 'dbk': 'dbk', |
12 'awt': 'awt', 'cwk': 'cwk', 'dbk': 'dbk', |
12 'doc': 'doc', 'docm': 'docm', 'docx': 'docx', |
13 'doc': 'doc', 'docm': 'docm', 'docx': 'docx', |
300 p['style'] = s.cssText |
301 p['style'] = s.cssText |
301 |
302 |
302 except KeyError: |
303 except KeyError: |
303 p['style'] = 'margin-top: 10pt; margin-bottom: 10pt;'; |
304 p['style'] = 'margin-top: 10pt; margin-bottom: 10pt;'; |
304 |
305 |
305 # for some reason having DOCTYPE declaration makes soup unhappy |
306 return do_tidy(unicode(soup)) |
306 output = re.sub(r'<!(<!DOCTYPE html[^>]*>)>', r'\1', unicode(soup)) |
|
307 # And for some reason, & is not converted to & from time to time! |
|
308 output = re.sub(r'&(?![A-Za-z]+[0-9]*;|#[0-9]+;|#x[0-9a-fA-F]+;)', r'&', output) |
|
309 return output |
|