340 def viewitems(self): |
344 def viewitems(self): |
341 "od.viewitems() -> a set-like object providing a view on od's items" |
345 "od.viewitems() -> a set-like object providing a view on od's items" |
342 return ItemsView(self) |
346 return ItemsView(self) |
343 ## end of http://code.activestate.com/recipes/576693/ }}} |
347 ## end of http://code.activestate.com/recipes/576693/ }}} |
344 |
348 |
345 def remove_accents(str): |
349 def remove_accents(lne): |
346 nkfd_form = unicodedata.normalize('NFKD', unicode(str)) |
350 nkfd_form = unicodedata.normalize('NFKD', unicode(lne)) |
347 return u"".join([c for c in nkfd_form if not unicodedata.combining(c)]) |
351 return u"".join([c for c in nkfd_form if not unicodedata.combining(c)]) |
348 |
352 |
349 def normalize(str): |
353 def normalize(lne): |
350 return remove_accents(str).lower().replace(u"œ",u"oe") |
354 return remove_accents(lne).lower().replace(u"œ",u"oe") |
|
355 |
|
356 def sanitize(line, separator = '-', ascii_only = True): |
|
357 |
|
358 if not line: |
|
359 return '' |
|
360 |
|
361 #Transliterate non-ASCII characters |
|
362 line = unidecode.unidecode(line) |
|
363 #Remove all characters that are not the separator, a-z, 0-9, or whitespace |
|
364 line = re.sub('[^\%sa-z0-9\s]+'%separator, '', line.lower()) |
|
365 #// Replace all separator characters and whitespace by a single separator |
|
366 line = re.sub('[\%s\s]+' % separator, separator, line) |
|
367 |
|
368 return line.strip(separator) |
|
369 |
351 |
370 |
352 def show_progress(current_line, total_line, label, width, writer=None): |
371 def show_progress(current_line, total_line, label, width, writer=None): |
353 |
372 |
354 if writer is None: |
373 if writer is None: |
355 writer = sys.stdout |
374 writer = sys.stdout |