| author | cavaliet |
| Tue, 17 Jun 2014 10:25:33 +0200 | |
| changeset 271 | 8f77cf71ab02 |
| parent 114 | web/hdabo/utils.py@c59383cc9940 |
| child 443 | 27f71b0a772d |
| permissions | -rw-r--r-- |
| 24 | 1 |
# -*- coding: utf-8 -*- |
|
11
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
2 |
import collections |
| 56 | 3 |
import unicodedata |
|
111
ceb381f5b0c7
query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents:
74
diff
changeset
|
4 |
import sys |
|
ceb381f5b0c7
query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents:
74
diff
changeset
|
5 |
import math |
|
113
0d2bfd84b989
improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents:
111
diff
changeset
|
6 |
import codecs |
| 0 | 7 |
|
8 |
### |
|
9 |
# allow to declare a property as a decorator |
|
10 |
### |
|
11 |
def Property(func): |
|
|
11
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
12 |
return property(**func()) |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
13 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
14 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
15 |
## {{{ http://code.activestate.com/recipes/576694/ (r7) |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
16 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
17 |
KEY, PREV, NEXT = range(3) |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
18 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
19 |
class OrderedSet(collections.MutableSet): |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
20 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
21 |
def __init__(self, iterable=None): |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
22 |
self.end = end = [] |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
23 |
end += [None, end, end] # sentinel node for doubly linked list |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
24 |
self.map = {} # key --> [key, prev, next] |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
25 |
if iterable is not None: |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
26 |
self |= iterable |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
27 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
28 |
def __len__(self): |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
29 |
return len(self.map) |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
30 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
31 |
def __contains__(self, key): |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
32 |
return key in self.map |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
33 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
34 |
def add(self, key): |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
35 |
if key not in self.map: |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
36 |
end = self.end |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
37 |
curr = end[PREV] |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
38 |
curr[NEXT] = end[PREV] = self.map[key] = [key, curr, end] |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
39 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
40 |
def discard(self, key): |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
41 |
if key in self.map: |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
42 |
key, prev, next = self.map.pop(key) |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
43 |
prev[NEXT] = next |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
44 |
next[PREV] = prev |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
45 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
46 |
def __iter__(self): |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
47 |
end = self.end |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
48 |
curr = end[NEXT] |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
49 |
while curr is not end: |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
50 |
yield curr[KEY] |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
51 |
curr = curr[NEXT] |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
52 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
53 |
def __reversed__(self): |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
54 |
end = self.end |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
55 |
curr = end[PREV] |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
56 |
while curr is not end: |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
57 |
yield curr[KEY] |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
58 |
curr = curr[PREV] |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
59 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
60 |
def pop(self, last=True): |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
61 |
if not self: |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
62 |
raise KeyError('set is empty') |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
63 |
key = next(reversed(self)) if last else next(iter(self)) |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
64 |
self.discard(key) |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
65 |
return key |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
66 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
67 |
def __repr__(self): |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
68 |
if not self: |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
69 |
return '%s()' % (self.__class__.__name__,) |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
70 |
return '%s(%r)' % (self.__class__.__name__, list(self)) |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
71 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
72 |
def __eq__(self, other): |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
73 |
if isinstance(other, OrderedSet): |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
74 |
return len(self) == len(other) and list(self) == list(other) |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
75 |
return set(self) == set(other) |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
76 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
77 |
def __del__(self): |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
78 |
self.clear() # remove circular references |
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
79 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
80 |
|
|
143ab88d17f8
add ordered manytomany fields and indexing
ymh <ymh.work@gmail.com>
parents:
0
diff
changeset
|
81 |
## end of http://code.activestate.com/recipes/576694/ }}} |
| 24 | 82 |
|
| 46 | 83 |
|
84 |
## {{{ http://code.activestate.com/recipes/576693/ (r9) |
|
85 |
# Backport of OrderedDict() class that runs on Python 2.4, 2.5, 2.6, 2.7 and pypy. |
|
86 |
# Passes Python2.7's test suite and incorporates all the latest updates. |
|
87 |
||
88 |
try: |
|
89 |
from thread import get_ident as _get_ident |
|
90 |
except ImportError: |
|
91 |
from dummy_thread import get_ident as _get_ident |
|
92 |
||
93 |
try: |
|
94 |
from _abcoll import KeysView, ValuesView, ItemsView |
|
95 |
except ImportError: |
|
96 |
pass |
|
97 |
||
98 |
||
99 |
class OrderedDict(dict): |
|
100 |
'Dictionary that remembers insertion order' |
|
101 |
# An inherited dict maps keys to values. |
|
102 |
# The inherited dict provides __getitem__, __len__, __contains__, and get. |
|
103 |
# The remaining methods are order-aware. |
|
104 |
# Big-O running times for all methods are the same as for regular dictionaries. |
|
105 |
||
106 |
# The internal self.__map dictionary maps keys to links in a doubly linked list. |
|
107 |
# The circular doubly linked list starts and ends with a sentinel element. |
|
108 |
# The sentinel element never gets deleted (this simplifies the algorithm). |
|
109 |
# Each link is stored as a list of length three: [PREV, NEXT, KEY]. |
|
110 |
||
111 |
def __init__(self, *args, **kwds): |
|
112 |
'''Initialize an ordered dictionary. Signature is the same as for |
|
113 |
regular dictionaries, but keyword arguments are not recommended |
|
114 |
because their insertion order is arbitrary. |
|
115 |
||
116 |
''' |
|
117 |
if len(args) > 1: |
|
118 |
raise TypeError('expected at most 1 arguments, got %d' % len(args)) |
|
119 |
try: |
|
120 |
self.__root |
|
121 |
except AttributeError: |
|
122 |
self.__root = root = [] # sentinel node |
|
123 |
root[:] = [root, root, None] |
|
124 |
self.__map = {} |
|
125 |
self.__update(*args, **kwds) |
|
126 |
||
127 |
def __setitem__(self, key, value, dict_setitem=dict.__setitem__): |
|
128 |
'od.__setitem__(i, y) <==> od[i]=y' |
|
129 |
# Setting a new item creates a new link which goes at the end of the linked |
|
130 |
# list, and the inherited dictionary is updated with the new key/value pair. |
|
131 |
if key not in self: |
|
132 |
root = self.__root |
|
133 |
last = root[0] |
|
134 |
last[1] = root[0] = self.__map[key] = [last, root, key] |
|
135 |
dict_setitem(self, key, value) |
|
136 |
||
137 |
def __delitem__(self, key, dict_delitem=dict.__delitem__): |
|
138 |
'od.__delitem__(y) <==> del od[y]' |
|
139 |
# Deleting an existing item uses self.__map to find the link which is |
|
140 |
# then removed by updating the links in the predecessor and successor nodes. |
|
141 |
dict_delitem(self, key) |
|
142 |
link_prev, link_next, key = self.__map.pop(key) |
|
143 |
link_prev[1] = link_next |
|
144 |
link_next[0] = link_prev |
|
145 |
||
146 |
def __iter__(self): |
|
147 |
'od.__iter__() <==> iter(od)' |
|
148 |
root = self.__root |
|
149 |
curr = root[1] |
|
150 |
while curr is not root: |
|
151 |
yield curr[2] |
|
152 |
curr = curr[1] |
|
153 |
||
154 |
def __reversed__(self): |
|
155 |
'od.__reversed__() <==> reversed(od)' |
|
156 |
root = self.__root |
|
157 |
curr = root[0] |
|
158 |
while curr is not root: |
|
159 |
yield curr[2] |
|
160 |
curr = curr[0] |
|
161 |
||
162 |
def clear(self): |
|
163 |
'od.clear() -> None. Remove all items from od.' |
|
164 |
try: |
|
165 |
for node in self.__map.itervalues(): |
|
166 |
del node[:] |
|
167 |
root = self.__root |
|
168 |
root[:] = [root, root, None] |
|
169 |
self.__map.clear() |
|
170 |
except AttributeError: |
|
171 |
pass |
|
172 |
dict.clear(self) |
|
173 |
||
174 |
def popitem(self, last=True): |
|
175 |
'''od.popitem() -> (k, v), return and remove a (key, value) pair. |
|
176 |
Pairs are returned in LIFO order if last is true or FIFO order if false. |
|
177 |
||
178 |
''' |
|
179 |
if not self: |
|
180 |
raise KeyError('dictionary is empty') |
|
181 |
root = self.__root |
|
182 |
if last: |
|
183 |
link = root[0] |
|
184 |
link_prev = link[0] |
|
185 |
link_prev[1] = root |
|
186 |
root[0] = link_prev |
|
187 |
else: |
|
188 |
link = root[1] |
|
189 |
link_next = link[1] |
|
190 |
root[1] = link_next |
|
191 |
link_next[0] = root |
|
192 |
key = link[2] |
|
193 |
del self.__map[key] |
|
194 |
value = dict.pop(self, key) |
|
195 |
return key, value |
|
196 |
||
197 |
# -- the following methods do not depend on the internal structure -- |
|
198 |
||
199 |
def keys(self): |
|
200 |
'od.keys() -> list of keys in od' |
|
201 |
return list(self) |
|
202 |
||
203 |
def values(self): |
|
204 |
'od.values() -> list of values in od' |
|
205 |
return [self[key] for key in self] |
|
206 |
||
207 |
def items(self): |
|
208 |
'od.items() -> list of (key, value) pairs in od' |
|
209 |
return [(key, self[key]) for key in self] |
|
210 |
||
211 |
def iterkeys(self): |
|
212 |
'od.iterkeys() -> an iterator over the keys in od' |
|
213 |
return iter(self) |
|
214 |
||
215 |
def itervalues(self): |
|
216 |
'od.itervalues -> an iterator over the values in od' |
|
217 |
for k in self: |
|
218 |
yield self[k] |
|
219 |
||
220 |
def iteritems(self): |
|
221 |
'od.iteritems -> an iterator over the (key, value) items in od' |
|
222 |
for k in self: |
|
223 |
yield (k, self[k]) |
|
224 |
||
|
49
55e5f8a878ae
add the version context_processor and correct css to display it correctly
ymh <ymh.work@gmail.com>
parents:
46
diff
changeset
|
225 |
def update(*args, **kwds): #@NoSelf |
| 46 | 226 |
'''od.update(E, **F) -> None. Update od from dict/iterable E and F. |
227 |
||
228 |
If E is a dict instance, does: for k in E: od[k] = E[k] |
|
229 |
If E has a .keys() method, does: for k in E.keys(): od[k] = E[k] |
|
230 |
Or if E is an iterable of items, does: for k, v in E: od[k] = v |
|
231 |
In either case, this is followed by: for k, v in F.items(): od[k] = v |
|
232 |
||
233 |
''' |
|
234 |
if len(args) > 2: |
|
235 |
raise TypeError('update() takes at most 2 positional ' |
|
236 |
'arguments (%d given)' % (len(args),)) |
|
237 |
elif not args: |
|
238 |
raise TypeError('update() takes at least 1 argument (0 given)') |
|
239 |
self = args[0] |
|
240 |
# Make progressively weaker assumptions about "other" |
|
241 |
other = () |
|
242 |
if len(args) == 2: |
|
243 |
other = args[1] |
|
244 |
if isinstance(other, dict): |
|
245 |
for key in other: |
|
246 |
self[key] = other[key] |
|
247 |
elif hasattr(other, 'keys'): |
|
248 |
for key in other.keys(): |
|
249 |
self[key] = other[key] |
|
250 |
else: |
|
251 |
for key, value in other: |
|
252 |
self[key] = value |
|
253 |
for key, value in kwds.items(): |
|
254 |
self[key] = value |
|
255 |
||
256 |
__update = update # let subclasses override update without breaking __init__ |
|
257 |
||
258 |
__marker = object() |
|
259 |
||
260 |
def pop(self, key, default=__marker): |
|
261 |
'''od.pop(k[,d]) -> v, remove specified key and return the corresponding value. |
|
262 |
If key is not found, d is returned if given, otherwise KeyError is raised. |
|
263 |
||
264 |
''' |
|
265 |
if key in self: |
|
266 |
result = self[key] |
|
267 |
del self[key] |
|
268 |
return result |
|
269 |
if default is self.__marker: |
|
270 |
raise KeyError(key) |
|
271 |
return default |
|
272 |
||
273 |
def setdefault(self, key, default=None): |
|
274 |
'od.setdefault(k[,d]) -> od.get(k,d), also set od[k]=d if k not in od' |
|
275 |
if key in self: |
|
276 |
return self[key] |
|
277 |
self[key] = default |
|
278 |
return default |
|
279 |
||
280 |
def __repr__(self, _repr_running={}): |
|
281 |
'od.__repr__() <==> repr(od)' |
|
282 |
call_key = id(self), _get_ident() |
|
283 |
if call_key in _repr_running: |
|
284 |
return '...' |
|
285 |
_repr_running[call_key] = 1 |
|
286 |
try: |
|
287 |
if not self: |
|
288 |
return '%s()' % (self.__class__.__name__,) |
|
289 |
return '%s(%r)' % (self.__class__.__name__, self.items()) |
|
290 |
finally: |
|
291 |
del _repr_running[call_key] |
|
292 |
||
293 |
def __reduce__(self): |
|
294 |
'Return state information for pickling' |
|
295 |
items = [[k, self[k]] for k in self] |
|
296 |
inst_dict = vars(self).copy() |
|
297 |
for k in vars(OrderedDict()): |
|
298 |
inst_dict.pop(k, None) |
|
299 |
if inst_dict: |
|
300 |
return (self.__class__, (items,), inst_dict) |
|
301 |
return self.__class__, (items,) |
|
302 |
||
303 |
def copy(self): |
|
304 |
'od.copy() -> a shallow copy of od' |
|
305 |
return self.__class__(self) |
|
306 |
||
307 |
@classmethod |
|
308 |
def fromkeys(cls, iterable, value=None): |
|
309 |
'''OD.fromkeys(S[, v]) -> New ordered dictionary with keys from S |
|
310 |
and values equal to v (which defaults to None). |
|
311 |
||
312 |
''' |
|
313 |
d = cls() |
|
314 |
for key in iterable: |
|
315 |
d[key] = value |
|
316 |
return d |
|
317 |
||
318 |
def __eq__(self, other): |
|
319 |
'''od.__eq__(y) <==> od==y. Comparison to another OD is order-sensitive |
|
320 |
while comparison to a regular mapping is order-insensitive. |
|
321 |
||
322 |
''' |
|
323 |
if isinstance(other, OrderedDict): |
|
| 56 | 324 |
return len(self) == len(other) and self.items() == other.items() |
| 46 | 325 |
return dict.__eq__(self, other) |
326 |
||
327 |
def __ne__(self, other): |
|
328 |
return not self == other |
|
329 |
||
330 |
# -- the following methods are only used in Python 2.7 -- |
|
331 |
||
332 |
def viewkeys(self): |
|
333 |
"od.viewkeys() -> a set-like object providing a view on od's keys" |
|
334 |
return KeysView(self) |
|
335 |
||
336 |
def viewvalues(self): |
|
337 |
"od.viewvalues() -> an object providing a view on od's values" |
|
338 |
return ValuesView(self) |
|
339 |
||
340 |
def viewitems(self): |
|
341 |
"od.viewitems() -> a set-like object providing a view on od's items" |
|
342 |
return ItemsView(self) |
|
343 |
## end of http://code.activestate.com/recipes/576693/ }}} |
|
344 |
||
| 56 | 345 |
def remove_accents(str): |
346 |
nkfd_form = unicodedata.normalize('NFKD', unicode(str)) |
|
347 |
return u"".join([c for c in nkfd_form if not unicodedata.combining(c)]) |
|
| 72 | 348 |
|
349 |
def normalize(str): |
|
| 74 | 350 |
return remove_accents(str).lower().replace(u"œ",u"oe") |
|
111
ceb381f5b0c7
query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents:
74
diff
changeset
|
351 |
|
|
113
0d2bfd84b989
improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents:
111
diff
changeset
|
352 |
def show_progress(current_line, total_line, label, width, writer=None): |
|
0d2bfd84b989
improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents:
111
diff
changeset
|
353 |
|
|
0d2bfd84b989
improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents:
111
diff
changeset
|
354 |
if writer is None: |
|
0d2bfd84b989
improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents:
111
diff
changeset
|
355 |
writer = sys.stdout |
|
0d2bfd84b989
improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents:
111
diff
changeset
|
356 |
if sys.stdout.encoding is not None: |
|
0d2bfd84b989
improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents:
111
diff
changeset
|
357 |
writer = codecs.getwriter(sys.stdout.encoding)(sys.stdout) |
|
111
ceb381f5b0c7
query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents:
74
diff
changeset
|
358 |
|
|
ceb381f5b0c7
query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents:
74
diff
changeset
|
359 |
percent = (float(current_line) / float(total_line)) * 100.0 |
|
ceb381f5b0c7
query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents:
74
diff
changeset
|
360 |
|
|
ceb381f5b0c7
query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents:
74
diff
changeset
|
361 |
marks = math.floor(width * (percent / 100.0)) |
|
ceb381f5b0c7
query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents:
74
diff
changeset
|
362 |
spaces = math.floor(width - marks) |
|
ceb381f5b0c7
query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents:
74
diff
changeset
|
363 |
|
|
ceb381f5b0c7
query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents:
74
diff
changeset
|
364 |
loader = u'[' + (u'=' * int(marks)) + (u' ' * int(spaces)) + u']' |
|
114
c59383cc9940
migrate categories extraction to hdalab
ymh <ymh.work@gmail.com>
parents:
113
diff
changeset
|
365 |
|
|
c59383cc9940
migrate categories extraction to hdalab
ymh <ymh.work@gmail.com>
parents:
113
diff
changeset
|
366 |
s = u"%s %3d%% %*d/%d - %*s\r" % (loader, percent, len(str(total_line)), current_line, total_line, width, label[:width]) |
|
111
ceb381f5b0c7
query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents:
74
diff
changeset
|
367 |
|
|
114
c59383cc9940
migrate categories extraction to hdalab
ymh <ymh.work@gmail.com>
parents:
113
diff
changeset
|
368 |
writer.write(s) #takes the header into account |
|
111
ceb381f5b0c7
query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents:
74
diff
changeset
|
369 |
if percent >= 100: |
|
113
0d2bfd84b989
improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents:
111
diff
changeset
|
370 |
writer.write("\n") |
|
0d2bfd84b989
improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents:
111
diff
changeset
|
371 |
writer.flush() |
|
0d2bfd84b989
improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents:
111
diff
changeset
|
372 |
|
|
0d2bfd84b989
improve cat and infobox extraction + export csv
ymh <ymh.work@gmail.com>
parents:
111
diff
changeset
|
373 |
return writer |
|
111
ceb381f5b0c7
query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents:
74
diff
changeset
|
374 |