For some reason, BeautifulSoup wants now fromEncoding='UTF-8' in some unidentified cases.
authorgibus
Thu, 27 Jun 2013 16:39:16 +0200
changeset 516 c6105d922ac6
parent 515 0be906e586e6
child 517 e7bc083fa9dc
For some reason, BeautifulSoup wants now fromEncoding='UTF-8' in some unidentified cases.
src/cm/utils/spannifier.py
--- a/src/cm/utils/spannifier.py	Wed Jun 26 23:43:08 2013 +0200
+++ b/src/cm/utils/spannifier.py	Thu Jun 27 16:39:16 2013 +0200
@@ -13,7 +13,7 @@
     return not textNode.findParent('style') 
 
 def get_the_soup(input):
-    return BeautifulSoup(input, convertEntities=BeautifulSoup.ALL_ENTITIES)
+    return BeautifulSoup(input, convertEntities=BeautifulSoup.ALL_ENTITIES, fromEncoding='UTF-8')
      
 from cm.utils.cache import memoize, dj_memoize
 @dj_memoize