--- a/src/cm/utils/spannifier.py Tue Jun 12 14:00:32 2012 +0200
+++ b/src/cm/utils/spannifier.py Thu Jun 14 11:43:46 2012 +0200
@@ -35,6 +35,11 @@
span_starts[i] = len(''.join(textNodes_content))
textNodes_content.append(textNode.string)
output = unicode(soup)
+ # Soup has introduced HTML entities, which should be expanded
+ output =re.sub(r""", '"', output)
+ output =re.sub(r"&", '&', output)
+ output =re.sub(r">", '>', output)
+ output =re.sub(r"<", '<', output)
textualized = ''.join(textNodes_content)
return output, textualized, span_starts