Fixed export with abiword by running tidy on html
authorgibus
Thu, 09 Aug 2012 11:06:46 +0200
changeset 457 f62f7f0bcaa4
parent 456 b2a7c015362b
child 458 ba7e05582435
Fixed export with abiword by running tidy on html
src/cm/converters/abi_converters.py
src/cm/views/texts.py
--- a/src/cm/converters/abi_converters.py	Thu Aug 09 10:11:21 2012 +0200
+++ b/src/cm/converters/abi_converters.py	Thu Aug 09 11:06:46 2012 +0200
@@ -5,6 +5,7 @@
 import pexpect
 
 from abi_error import AbiConverterError, AbiCommandError
+from cm.converters.pandoc_converters import do_tidy
 
 
 TYPES_IN  = {'602': '602',       'abw': 'abw',       'aw': 'aw',     
@@ -302,8 +303,4 @@
           except KeyError:
             p['style'] = 'margin-top: 10pt; margin-bottom: 10pt;';
 
-        # for some reason having DOCTYPE declaration makes soup unhappy
-        output = re.sub(r'<!(<!DOCTYPE html[^>]*>)>', r'\1', unicode(soup))
-        # And for some reason, & is not converted to &amp; from time to time!
-        output = re.sub(r'&(?![A-Za-z]+[0-9]*;|#[0-9]+;|#x[0-9a-fA-F]+;)', r'&amp;', output)
-        return output
+        return do_tidy(unicode(soup))
--- a/src/cm/views/texts.py	Thu Aug 09 10:11:21 2012 +0200
+++ b/src/cm/views/texts.py	Thu Aug 09 11:06:46 2012 +0200
@@ -302,7 +302,7 @@
   Replaces (html) links to attachs with embeded inline images
   """
   content = re.sub("%s" %settings.SITE_URL, '', content) # changes absolute urls to relative urls
-  attach_re = r'(?:/text/(?P<key>\w*))?/attach/(?P<attach_key>\w*)/'
+  attach_re = r'"(?:/text/(?P<key>\w*))?/attach/(?P<attach_key>\w*)/'
   attach_str_textversion = r'/text/%s/attach/%s/'
   attach_str = r'/attach/%s/'
   for match in re.findall(attach_re, content):