For some reason, BeautifulSoup wants now fromEncoding='UTF-8' in some unidentified cases.
--- a/src/cm/utils/spannifier.py Wed Jun 26 23:43:08 2013 +0200
+++ b/src/cm/utils/spannifier.py Thu Jun 27 16:39:16 2013 +0200
@@ -13,7 +13,7 @@
return not textNode.findParent('style')
def get_the_soup(input):
- return BeautifulSoup(input, convertEntities=BeautifulSoup.ALL_ENTITIES)
+ return BeautifulSoup(input, convertEntities=BeautifulSoup.ALL_ENTITIES, fromEncoding='UTF-8')
from cm.utils.cache import memoize, dj_memoize
@dj_memoize