src/hdabo/utils.py
changeset 443 27f71b0a772d
parent 266 825ff4d6a8ac
equal deleted inserted replaced
442:3d54acec55d6 443:27f71b0a772d
     1 # -*- coding: utf-8 -*-
     1 # -*- coding: utf-8 -*-
       
     2 import codecs
     2 import collections
     3 import collections
       
     4 import math
       
     5 import re
       
     6 import sys
     3 import unicodedata
     7 import unicodedata
     4 import sys
     8 
     5 import math
     9 import unidecode
     6 import codecs
    10 
     7 
    11 
     8 ###
    12 ###
     9 # allow to declare a property as a decorator
    13 # allow to declare a property as a decorator
    10 ###
    14 ###
    11 def Property(func):
    15 def Property(func):
   340     def viewitems(self):
   344     def viewitems(self):
   341         "od.viewitems() -> a set-like object providing a view on od's items"
   345         "od.viewitems() -> a set-like object providing a view on od's items"
   342         return ItemsView(self)
   346         return ItemsView(self)
   343 ## end of http://code.activestate.com/recipes/576693/ }}}
   347 ## end of http://code.activestate.com/recipes/576693/ }}}
   344 
   348 
   345 def remove_accents(str):
   349 def remove_accents(lne):
   346     nkfd_form = unicodedata.normalize('NFKD', unicode(str))
   350     nkfd_form = unicodedata.normalize('NFKD', unicode(lne))
   347     return u"".join([c for c in nkfd_form if not unicodedata.combining(c)])
   351     return u"".join([c for c in nkfd_form if not unicodedata.combining(c)])
   348 
   352 
   349 def normalize(str):
   353 def normalize(lne):
   350     return remove_accents(str).lower().replace(u"œ",u"oe")
   354     return remove_accents(lne).lower().replace(u"œ",u"oe")
       
   355 
       
   356 def sanitize(line, separator = '-', ascii_only = True):
       
   357 
       
   358     if not line:
       
   359         return ''
       
   360     
       
   361     #Transliterate non-ASCII characters
       
   362     line =  unidecode.unidecode(line)
       
   363     #Remove all characters that are not the separator, a-z, 0-9, or whitespace
       
   364     line = re.sub('[^\%sa-z0-9\s]+'%separator, '', line.lower())
       
   365     #// Replace all separator characters and whitespace by a single separator
       
   366     line = re.sub('[\%s\s]+' % separator, separator, line)
       
   367 
       
   368     return line.strip(separator)
       
   369 
   351 
   370 
   352 def show_progress(current_line, total_line, label, width, writer=None):
   371 def show_progress(current_line, total_line, label, width, writer=None):
   353 
   372 
   354     if writer is None:
   373     if writer is None:
   355         writer = sys.stdout
   374         writer = sys.stdout