# HG changeset patch # User gibus # Date 1344503206 -7200 # Node ID f62f7f0bcaa431389cd3e4b2195d253be76c331d # Parent b2a7c015362b274e08f3abc1053237acebb2d02f Fixed export with abiword by running tidy on html diff -r b2a7c015362b -r f62f7f0bcaa4 src/cm/converters/abi_converters.py --- a/src/cm/converters/abi_converters.py Thu Aug 09 10:11:21 2012 +0200 +++ b/src/cm/converters/abi_converters.py Thu Aug 09 11:06:46 2012 +0200 @@ -5,6 +5,7 @@ import pexpect from abi_error import AbiConverterError, AbiCommandError +from cm.converters.pandoc_converters import do_tidy TYPES_IN = {'602': '602', 'abw': 'abw', 'aw': 'aw', @@ -302,8 +303,4 @@ except KeyError: p['style'] = 'margin-top: 10pt; margin-bottom: 10pt;'; - # for some reason having DOCTYPE declaration makes soup unhappy - output = re.sub(r']*>)>', r'\1', unicode(soup)) - # And for some reason, & is not converted to & from time to time! - output = re.sub(r'&(?![A-Za-z]+[0-9]*;|#[0-9]+;|#x[0-9a-fA-F]+;)', r'&', output) - return output + return do_tidy(unicode(soup)) diff -r b2a7c015362b -r f62f7f0bcaa4 src/cm/views/texts.py --- a/src/cm/views/texts.py Thu Aug 09 10:11:21 2012 +0200 +++ b/src/cm/views/texts.py Thu Aug 09 11:06:46 2012 +0200 @@ -302,7 +302,7 @@ Replaces (html) links to attachs with embeded inline images """ content = re.sub("%s" %settings.SITE_URL, '', content) # changes absolute urls to relative urls - attach_re = r'(?:/text/(?P\w*))?/attach/(?P\w*)/' + attach_re = r'"(?:/text/(?P\w*))?/attach/(?P\w*)/' attach_str_textversion = r'/text/%s/attach/%s/' attach_str = r'/attach/%s/' for match in re.findall(attach_re, content):