src/hdabo/utils.py
author rougeronj
Fri, 13 Mar 2015 15:11:09 +0100
changeset 540 dcea08e78780
parent 443 27f71b0a772d
permissions -rw-r--r--
update text, html, and css of footer pages
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
24
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
     1
# -*- coding: utf-8 -*-
443
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
     2
import codecs
11
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
     3
import collections
443
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
     4
import math
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
     5
import re
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
     6
import sys
56
e70cbbc093cc improve tag letter list
ymh <ymh.work@gmail.com>
parents: 49
diff changeset
     7
import unicodedata
443
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
     8
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
     9
import unidecode
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
    10
0
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
###
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
# allow to declare a property as a decorator
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
###
896db0083b76 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
def Property(func):
11
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    16
    return property(**func())
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    17
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    18
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    19
## {{{ http://code.activestate.com/recipes/576694/ (r7)
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    20
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    21
KEY, PREV, NEXT = range(3)
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    22
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    23
class OrderedSet(collections.MutableSet):
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    24
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    25
    def __init__(self, iterable=None):
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    26
        self.end = end = [] 
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    27
        end += [None, end, end]         # sentinel node for doubly linked list
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    28
        self.map = {}                   # key --> [key, prev, next]
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    29
        if iterable is not None:
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    30
            self |= iterable
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    31
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    32
    def __len__(self):
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    33
        return len(self.map)
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    34
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    35
    def __contains__(self, key):
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    36
        return key in self.map
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    37
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    38
    def add(self, key):
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    39
        if key not in self.map:
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    40
            end = self.end
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    41
            curr = end[PREV]
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    42
            curr[NEXT] = end[PREV] = self.map[key] = [key, curr, end]
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    43
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    44
    def discard(self, key):
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    45
        if key in self.map:        
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    46
            key, prev, next = self.map.pop(key)
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    47
            prev[NEXT] = next
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    48
            next[PREV] = prev
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    49
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    50
    def __iter__(self):
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    51
        end = self.end
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    52
        curr = end[NEXT]
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    53
        while curr is not end:
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    54
            yield curr[KEY]
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    55
            curr = curr[NEXT]
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    56
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    57
    def __reversed__(self):
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    58
        end = self.end
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    59
        curr = end[PREV]
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    60
        while curr is not end:
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    61
            yield curr[KEY]
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    62
            curr = curr[PREV]
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    63
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    64
    def pop(self, last=True):
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    65
        if not self:
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    66
            raise KeyError('set is empty')
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    67
        key = next(reversed(self)) if last else next(iter(self))
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    68
        self.discard(key)
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    69
        return key
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    70
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    71
    def __repr__(self):
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    72
        if not self:
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    73
            return '%s()' % (self.__class__.__name__,)
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    74
        return '%s(%r)' % (self.__class__.__name__, list(self))
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    75
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    76
    def __eq__(self, other):
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    77
        if isinstance(other, OrderedSet):
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    78
            return len(self) == len(other) and list(self) == list(other)
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    79
        return set(self) == set(other)
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    80
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    81
    def __del__(self):
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    82
        self.clear()                    # remove circular references
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    83
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    84
            
143ab88d17f8 add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    85
## end of http://code.activestate.com/recipes/576694/ }}}
24
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents: 11
diff changeset
    86
46
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
    87
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
    88
## {{{ http://code.activestate.com/recipes/576693/ (r9)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
    89
# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy.
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
    90
# Passes Python2.7's test suite and incorporates all the latest updates.
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
    91
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
    92
try:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
    93
    from thread import get_ident as _get_ident
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
    94
except ImportError:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
    95
    from dummy_thread import get_ident as _get_ident
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
    96
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
    97
try:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
    98
    from _abcoll import KeysView, ValuesView, ItemsView
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
    99
except ImportError:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   100
    pass
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   101
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   102
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   103
class OrderedDict(dict):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   104
    'Dictionary that remembers insertion order'
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   105
    # An inherited dict maps keys to values.
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   106
    # The inherited dict provides __getitem__, __len__, __contains__, and get.
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   107
    # The remaining methods are order-aware.
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   108
    # Big-O running times for all methods are the same as for regular dictionaries.
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   109
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   110
    # The internal self.__map dictionary maps keys to links in a doubly linked list.
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   111
    # The circular doubly linked list starts and ends with a sentinel element.
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   112
    # The sentinel element never gets deleted (this simplifies the algorithm).
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   113
    # Each link is stored as a list of length three:  [PREV, NEXT, KEY].
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   114
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   115
    def __init__(self, *args, **kwds):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   116
        '''Initialize an ordered dictionary.  Signature is the same as for
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   117
        regular dictionaries, but keyword arguments are not recommended
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   118
        because their insertion order is arbitrary.
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   119
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   120
        '''
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   121
        if len(args) > 1:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   122
            raise TypeError('expected at most 1 arguments, got %d' % len(args))
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   123
        try:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   124
            self.__root
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   125
        except AttributeError:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   126
            self.__root = root = []                     # sentinel node
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   127
            root[:] = [root, root, None]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   128
            self.__map = {}
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   129
        self.__update(*args, **kwds)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   130
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   131
    def __setitem__(self, key, value, dict_setitem=dict.__setitem__):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   132
        'od.__setitem__(i, y) <==> od[i]=y'
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   133
        # Setting a new item creates a new link which goes at the end of the linked
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   134
        # list, and the inherited dictionary is updated with the new key/value pair.
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   135
        if key not in self:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   136
            root = self.__root
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   137
            last = root[0]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   138
            last[1] = root[0] = self.__map[key] = [last, root, key]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   139
        dict_setitem(self, key, value)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   140
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   141
    def __delitem__(self, key, dict_delitem=dict.__delitem__):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   142
        'od.__delitem__(y) <==> del od[y]'
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   143
        # Deleting an existing item uses self.__map to find the link which is
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   144
        # then removed by updating the links in the predecessor and successor nodes.
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   145
        dict_delitem(self, key)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   146
        link_prev, link_next, key = self.__map.pop(key)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   147
        link_prev[1] = link_next
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   148
        link_next[0] = link_prev
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   149
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   150
    def __iter__(self):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   151
        'od.__iter__() <==> iter(od)'
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   152
        root = self.__root
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   153
        curr = root[1]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   154
        while curr is not root:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   155
            yield curr[2]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   156
            curr = curr[1]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   157
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   158
    def __reversed__(self):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   159
        'od.__reversed__() <==> reversed(od)'
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   160
        root = self.__root
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   161
        curr = root[0]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   162
        while curr is not root:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   163
            yield curr[2]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   164
            curr = curr[0]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   165
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   166
    def clear(self):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   167
        'od.clear() -> None.  Remove all items from od.'
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   168
        try:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   169
            for node in self.__map.itervalues():
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   170
                del node[:]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   171
            root = self.__root
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   172
            root[:] = [root, root, None]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   173
            self.__map.clear()
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   174
        except AttributeError:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   175
            pass
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   176
        dict.clear(self)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   177
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   178
    def popitem(self, last=True):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   179
        '''od.popitem() -> (k, v), return and remove a (key, value) pair.
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   180
        Pairs are returned in LIFO order if last is true or FIFO order if false.
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   181
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   182
        '''
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   183
        if not self:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   184
            raise KeyError('dictionary is empty')
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   185
        root = self.__root
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   186
        if last:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   187
            link = root[0]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   188
            link_prev = link[0]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   189
            link_prev[1] = root
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   190
            root[0] = link_prev
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   191
        else:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   192
            link = root[1]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   193
            link_next = link[1]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   194
            root[1] = link_next
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   195
            link_next[0] = root
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   196
        key = link[2]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   197
        del self.__map[key]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   198
        value = dict.pop(self, key)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   199
        return key, value
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   200
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   201
    # -- the following methods do not depend on the internal structure --
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   202
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   203
    def keys(self):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   204
        'od.keys() -> list of keys in od'
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   205
        return list(self)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   206
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   207
    def values(self):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   208
        'od.values() -> list of values in od'
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   209
        return [self[key] for key in self]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   210
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   211
    def items(self):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   212
        'od.items() -> list of (key, value) pairs in od'
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   213
        return [(key, self[key]) for key in self]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   214
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   215
    def iterkeys(self):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   216
        'od.iterkeys() -> an iterator over the keys in od'
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   217
        return iter(self)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   218
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   219
    def itervalues(self):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   220
        'od.itervalues -> an iterator over the values in od'
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   221
        for k in self:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   222
            yield self[k]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   223
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   224
    def iteritems(self):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   225
        'od.iteritems -> an iterator over the (key, value) items in od'
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   226
        for k in self:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   227
            yield (k, self[k])
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   228
49
55e5f8a878ae add the version context_processor and correct css to display it correctly
ymh <ymh.work@gmail.com>
parents: 46
diff changeset
   229
    def update(*args, **kwds): #@NoSelf
46
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   230
        '''od.update(E, **F) -> None.  Update od from dict/iterable E and F.
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   231
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   232
        If E is a dict instance, does:           for k in E: od[k] = E[k]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   233
        If E has a .keys() method, does:         for k in E.keys(): od[k] = E[k]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   234
        Or if E is an iterable of items, does:   for k, v in E: od[k] = v
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   235
        In either case, this is followed by:     for k, v in F.items(): od[k] = v
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   236
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   237
        '''
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   238
        if len(args) > 2:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   239
            raise TypeError('update() takes at most 2 positional '
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   240
                            'arguments (%d given)' % (len(args),))
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   241
        elif not args:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   242
            raise TypeError('update() takes at least 1 argument (0 given)')
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   243
        self = args[0]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   244
        # Make progressively weaker assumptions about "other"
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   245
        other = ()
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   246
        if len(args) == 2:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   247
            other = args[1]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   248
        if isinstance(other, dict):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   249
            for key in other:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   250
                self[key] = other[key]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   251
        elif hasattr(other, 'keys'):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   252
            for key in other.keys():
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   253
                self[key] = other[key]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   254
        else:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   255
            for key, value in other:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   256
                self[key] = value
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   257
        for key, value in kwds.items():
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   258
            self[key] = value
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   259
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   260
    __update = update  # let subclasses override update without breaking __init__
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   261
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   262
    __marker = object()
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   263
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   264
    def pop(self, key, default=__marker):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   265
        '''od.pop(k[,d]) -> v, remove specified key and return the corresponding value.
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   266
        If key is not found, d is returned if given, otherwise KeyError is raised.
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   267
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   268
        '''
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   269
        if key in self:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   270
            result = self[key]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   271
            del self[key]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   272
            return result
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   273
        if default is self.__marker:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   274
            raise KeyError(key)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   275
        return default
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   276
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   277
    def setdefault(self, key, default=None):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   278
        'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od'
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   279
        if key in self:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   280
            return self[key]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   281
        self[key] = default
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   282
        return default
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   283
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   284
    def __repr__(self, _repr_running={}):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   285
        'od.__repr__() <==> repr(od)'
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   286
        call_key = id(self), _get_ident()
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   287
        if call_key in _repr_running:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   288
            return '...'
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   289
        _repr_running[call_key] = 1
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   290
        try:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   291
            if not self:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   292
                return '%s()' % (self.__class__.__name__,)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   293
            return '%s(%r)' % (self.__class__.__name__, self.items())
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   294
        finally:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   295
            del _repr_running[call_key]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   296
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   297
    def __reduce__(self):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   298
        'Return state information for pickling'
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   299
        items = [[k, self[k]] for k in self]
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   300
        inst_dict = vars(self).copy()
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   301
        for k in vars(OrderedDict()):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   302
            inst_dict.pop(k, None)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   303
        if inst_dict:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   304
            return (self.__class__, (items,), inst_dict)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   305
        return self.__class__, (items,)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   306
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   307
    def copy(self):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   308
        'od.copy() -> a shallow copy of od'
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   309
        return self.__class__(self)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   310
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   311
    @classmethod
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   312
    def fromkeys(cls, iterable, value=None):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   313
        '''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   314
        and values equal to v (which defaults to None).
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   315
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   316
        '''
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   317
        d = cls()
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   318
        for key in iterable:
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   319
            d[key] = value
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   320
        return d
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   321
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   322
    def __eq__(self, other):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   323
        '''od.__eq__(y) <==> od==y.  Comparison to another OD is order-sensitive
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   324
        while comparison to a regular mapping is order-insensitive.
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   325
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   326
        '''
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   327
        if isinstance(other, OrderedDict):
56
e70cbbc093cc improve tag letter list
ymh <ymh.work@gmail.com>
parents: 49
diff changeset
   328
            return len(self) == len(other) and self.items() == other.items()
46
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   329
        return dict.__eq__(self, other)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   330
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   331
    def __ne__(self, other):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   332
        return not self == other
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   333
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   334
    # -- the following methods are only used in Python 2.7 --
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   335
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   336
    def viewkeys(self):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   337
        "od.viewkeys() -> a set-like object providing a view on od's keys"
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   338
        return KeysView(self)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   339
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   340
    def viewvalues(self):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   341
        "od.viewvalues() -> an object providing a view on od's values"
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   342
        return ValuesView(self)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   343
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   344
    def viewitems(self):
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   345
        "od.viewitems() -> a set-like object providing a view on od's items"
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   346
        return ItemsView(self)
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   347
## end of http://code.activestate.com/recipes/576693/ }}}
3ad571e54608 get categories enhancement.
cavaliet
parents: 24
diff changeset
   348
443
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   349
def remove_accents(lne):
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   350
    nkfd_form = unicodedata.normalize('NFKD', unicode(lne))
56
e70cbbc093cc improve tag letter list
ymh <ymh.work@gmail.com>
parents: 49
diff changeset
   351
    return u"".join([c for c in nkfd_form if not unicodedata.combining(c)])
72
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 56
diff changeset
   352
443
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   353
def normalize(lne):
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   354
    return remove_accents(lne).lower().replace(u"œ",u"oe")
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   355
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   356
def sanitize(line, separator = '-', ascii_only = True):
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   357
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   358
    if not line:
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   359
        return ''
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   360
    
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   361
    #Transliterate non-ASCII characters
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   362
    line =  unidecode.unidecode(line)
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   363
    #Remove all characters that are not the separator, a-z, 0-9, or whitespace
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   364
    line = re.sub('[^\%sa-z0-9\s]+'%separator, '', line.lower())
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   365
    #// Replace all separator characters and whitespace by a single separator
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   366
    line = re.sub('[\%s\s]+' % separator, separator, line)
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   367
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   368
    return line.strip(separator)
27f71b0a772d next version of import_rdf
ymh <ymh.work@gmail.com>
parents: 266
diff changeset
   369
111
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 74
diff changeset
   370
113
0d2bfd84b989 improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
   371
def show_progress(current_line, total_line, label, width, writer=None):
0d2bfd84b989 improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
   372
0d2bfd84b989 improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
   373
    if writer is None:
0d2bfd84b989 improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
   374
        writer = sys.stdout
0d2bfd84b989 improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
   375
        if sys.stdout.encoding is not None:
0d2bfd84b989 improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
   376
            writer = codecs.getwriter(sys.stdout.encoding)(sys.stdout)
111
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 74
diff changeset
   377
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 74
diff changeset
   378
    percent = (float(current_line) / float(total_line)) * 100.0
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 74
diff changeset
   379
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 74
diff changeset
   380
    marks = math.floor(width * (percent / 100.0))
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 74
diff changeset
   381
    spaces = math.floor(width - marks)
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 74
diff changeset
   382
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 74
diff changeset
   383
    loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']'
114
c59383cc9940 migrate categories extraction to hdalab
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
   384
        
c59383cc9940 migrate categories extraction to hdalab
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
   385
    s = u"%s %3d%% %*d/%d - %*s\r" % (loader, percent, len(str(total_line)), current_line, total_line, width, label[:width])
111
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 74
diff changeset
   386
    
114
c59383cc9940 migrate categories extraction to hdalab
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
   387
    writer.write(s) #takes the header into account
111
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 74
diff changeset
   388
    if percent >= 100:
113
0d2bfd84b989 improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
   389
        writer.write("\n")
0d2bfd84b989 improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
   390
    writer.flush()
0d2bfd84b989 improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
   391
    
0d2bfd84b989 improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents: 111
diff changeset
   392
    return writer
111
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 74
diff changeset
   393