diff -r 5387c032df35 -r 81fa74c112b8 src/cm/utils/spannifier.py --- a/src/cm/utils/spannifier.py Tue Jun 12 14:00:32 2012 +0200 +++ b/src/cm/utils/spannifier.py Thu Jun 14 11:43:46 2012 +0200 @@ -35,6 +35,11 @@ span_starts[i] = len(''.join(textNodes_content)) textNodes_content.append(textNode.string) output = unicode(soup) + # Soup has introduced HTML entities, which should be expanded + output =re.sub(r""", '"', output) + output =re.sub(r"&", '&', output) + output =re.sub(r">", '>', output) + output =re.sub(r"<", '<', output) textualized = ''.join(textNodes_content) return output, textualized, span_starts