|
1 """ |
|
2 Tagging utilities - from user tag input parsing to tag cloud |
|
3 calculation. |
|
4 """ |
|
5 import math |
|
6 import types |
|
7 |
|
8 from django.db.models.query import QuerySet |
|
9 from django.utils.encoding import force_unicode |
|
10 from django.utils.translation import ugettext as _ |
|
11 |
|
12 # Python 2.3 compatibility |
|
13 try: |
|
14 set |
|
15 except NameError: |
|
16 from sets import Set as set |
|
17 |
|
18 def parse_tag_input(input): |
|
19 """ |
|
20 Parses tag input, with multiple word input being activated and |
|
21 delineated by commas and double quotes. Quotes take precedence, so |
|
22 they may contain commas. |
|
23 |
|
24 Returns a sorted list of unique tag names. |
|
25 """ |
|
26 if not input: |
|
27 return [] |
|
28 |
|
29 input = force_unicode(input) |
|
30 |
|
31 # Special case - if there are no commas or double quotes in the |
|
32 # input, we don't *do* a recall... I mean, we know we only need to |
|
33 # split on spaces. |
|
34 if u',' not in input and u'"' not in input: |
|
35 words = list(set(split_strip(input, u' '))) |
|
36 words.sort() |
|
37 return words |
|
38 |
|
39 words = [] |
|
40 buffer = [] |
|
41 # Defer splitting of non-quoted sections until we know if there are |
|
42 # any unquoted commas. |
|
43 to_be_split = [] |
|
44 saw_loose_comma = False |
|
45 open_quote = False |
|
46 i = iter(input) |
|
47 try: |
|
48 while 1: |
|
49 c = i.next() |
|
50 if c == u'"': |
|
51 if buffer: |
|
52 to_be_split.append(u''.join(buffer)) |
|
53 buffer = [] |
|
54 # Find the matching quote |
|
55 open_quote = True |
|
56 c = i.next() |
|
57 while c != u'"': |
|
58 buffer.append(c) |
|
59 c = i.next() |
|
60 if buffer: |
|
61 word = u''.join(buffer).strip() |
|
62 if word: |
|
63 words.append(word) |
|
64 buffer = [] |
|
65 open_quote = False |
|
66 else: |
|
67 if not saw_loose_comma and c == u',': |
|
68 saw_loose_comma = True |
|
69 buffer.append(c) |
|
70 except StopIteration: |
|
71 # If we were parsing an open quote which was never closed treat |
|
72 # the buffer as unquoted. |
|
73 if buffer: |
|
74 if open_quote and u',' in buffer: |
|
75 saw_loose_comma = True |
|
76 to_be_split.append(u''.join(buffer)) |
|
77 if to_be_split: |
|
78 if saw_loose_comma: |
|
79 delimiter = u',' |
|
80 else: |
|
81 delimiter = u' ' |
|
82 for chunk in to_be_split: |
|
83 words.extend(split_strip(chunk, delimiter)) |
|
84 words = list(set(words)) |
|
85 words.sort() |
|
86 return words |
|
87 |
|
88 def split_strip(input, delimiter=u','): |
|
89 """ |
|
90 Splits ``input`` on ``delimiter``, stripping each resulting string |
|
91 and returning a list of non-empty strings. |
|
92 """ |
|
93 if not input: |
|
94 return [] |
|
95 |
|
96 words = [w.strip() for w in input.split(delimiter)] |
|
97 return [w for w in words if w] |
|
98 |
|
99 def edit_string_for_tags(tags): |
|
100 """ |
|
101 Given list of ``Tag`` instances, creates a string representation of |
|
102 the list suitable for editing by the user, such that submitting the |
|
103 given string representation back without changing it will give the |
|
104 same list of tags. |
|
105 |
|
106 Tag names which contain commas will be double quoted. |
|
107 |
|
108 If any tag name which isn't being quoted contains whitespace, the |
|
109 resulting string of tag names will be comma-delimited, otherwise |
|
110 it will be space-delimited. |
|
111 """ |
|
112 names = [] |
|
113 use_commas = False |
|
114 for tag in tags: |
|
115 name = tag.name |
|
116 if u',' in name: |
|
117 names.append('"%s"' % name) |
|
118 continue |
|
119 elif u' ' in name: |
|
120 if not use_commas: |
|
121 use_commas = True |
|
122 names.append(name) |
|
123 if use_commas: |
|
124 glue = u', ' |
|
125 else: |
|
126 glue = u' ' |
|
127 return glue.join(names) |
|
128 |
|
129 def get_queryset_and_model(queryset_or_model): |
|
130 """ |
|
131 Given a ``QuerySet`` or a ``Model``, returns a two-tuple of |
|
132 (queryset, model). |
|
133 |
|
134 If a ``Model`` is given, the ``QuerySet`` returned will be created |
|
135 using its default manager. |
|
136 """ |
|
137 try: |
|
138 return queryset_or_model, queryset_or_model.model |
|
139 except AttributeError: |
|
140 return queryset_or_model._default_manager.all(), queryset_or_model |
|
141 |
|
142 def get_tag_list(tags): |
|
143 """ |
|
144 Utility function for accepting tag input in a flexible manner. |
|
145 |
|
146 If a ``Tag`` object is given, it will be returned in a list as |
|
147 its single occupant. |
|
148 |
|
149 If given, the tag names in the following will be used to create a |
|
150 ``Tag`` ``QuerySet``: |
|
151 |
|
152 * A string, which may contain multiple tag names. |
|
153 * A list or tuple of strings corresponding to tag names. |
|
154 * A list or tuple of integers corresponding to tag ids. |
|
155 |
|
156 If given, the following will be returned as-is: |
|
157 |
|
158 * A list or tuple of ``Tag`` objects. |
|
159 * A ``Tag`` ``QuerySet``. |
|
160 |
|
161 """ |
|
162 from tagging.models import Tag |
|
163 if isinstance(tags, Tag): |
|
164 return [tags] |
|
165 elif isinstance(tags, QuerySet) and tags.model is Tag: |
|
166 return tags |
|
167 elif isinstance(tags, types.StringTypes): |
|
168 return Tag.objects.filter(name__in=parse_tag_input(tags)) |
|
169 elif isinstance(tags, (types.ListType, types.TupleType)): |
|
170 if len(tags) == 0: |
|
171 return tags |
|
172 contents = set() |
|
173 for item in tags: |
|
174 if isinstance(item, types.StringTypes): |
|
175 contents.add('string') |
|
176 elif isinstance(item, Tag): |
|
177 contents.add('tag') |
|
178 elif isinstance(item, (types.IntType, types.LongType)): |
|
179 contents.add('int') |
|
180 if len(contents) == 1: |
|
181 if 'string' in contents: |
|
182 return Tag.objects.filter(name__in=[force_unicode(tag) \ |
|
183 for tag in tags]) |
|
184 elif 'tag' in contents: |
|
185 return tags |
|
186 elif 'int' in contents: |
|
187 return Tag.objects.filter(id__in=tags) |
|
188 else: |
|
189 raise ValueError(_('If a list or tuple of tags is provided, they must all be tag names, Tag objects or Tag ids.')) |
|
190 else: |
|
191 raise ValueError(_('The tag input given was invalid.')) |
|
192 |
|
193 def get_tag(tag): |
|
194 """ |
|
195 Utility function for accepting single tag input in a flexible |
|
196 manner. |
|
197 |
|
198 If a ``Tag`` object is given it will be returned as-is; if a |
|
199 string or integer are given, they will be used to lookup the |
|
200 appropriate ``Tag``. |
|
201 |
|
202 If no matching tag can be found, ``None`` will be returned. |
|
203 """ |
|
204 from tagging.models import Tag |
|
205 if isinstance(tag, Tag): |
|
206 return tag |
|
207 |
|
208 try: |
|
209 if isinstance(tag, types.StringTypes): |
|
210 return Tag.objects.get(name=tag) |
|
211 elif isinstance(tag, (types.IntType, types.LongType)): |
|
212 return Tag.objects.get(id=tag) |
|
213 except Tag.DoesNotExist: |
|
214 pass |
|
215 |
|
216 return None |
|
217 |
|
218 # Font size distribution algorithms |
|
219 LOGARITHMIC, LINEAR = 1, 2 |
|
220 |
|
221 def _calculate_thresholds(min_weight, max_weight, steps): |
|
222 delta = (max_weight - min_weight) / float(steps) |
|
223 return [min_weight + i * delta for i in range(1, steps + 1)] |
|
224 |
|
225 def _calculate_tag_weight(weight, max_weight, distribution): |
|
226 """ |
|
227 Logarithmic tag weight calculation is based on code from the |
|
228 `Tag Cloud`_ plugin for Mephisto, by Sven Fuchs. |
|
229 |
|
230 .. _`Tag Cloud`: http://www.artweb-design.de/projects/mephisto-plugin-tag-cloud |
|
231 """ |
|
232 if distribution == LINEAR or max_weight == 1: |
|
233 return weight |
|
234 elif distribution == LOGARITHMIC: |
|
235 return math.log(weight) * max_weight / math.log(max_weight) |
|
236 raise ValueError(_('Invalid distribution algorithm specified: %s.') % distribution) |
|
237 |
|
238 def calculate_cloud(tags, steps=4, distribution=LOGARITHMIC): |
|
239 """ |
|
240 Add a ``font_size`` attribute to each tag according to the |
|
241 frequency of its use, as indicated by its ``count`` |
|
242 attribute. |
|
243 |
|
244 ``steps`` defines the range of font sizes - ``font_size`` will |
|
245 be an integer between 1 and ``steps`` (inclusive). |
|
246 |
|
247 ``distribution`` defines the type of font size distribution |
|
248 algorithm which will be used - logarithmic or linear. It must be |
|
249 one of ``tagging.utils.LOGARITHMIC`` or ``tagging.utils.LINEAR``. |
|
250 """ |
|
251 if len(tags) > 0: |
|
252 counts = [tag.count for tag in tags] |
|
253 min_weight = float(min(counts)) |
|
254 max_weight = float(max(counts)) |
|
255 thresholds = _calculate_thresholds(min_weight, max_weight, steps) |
|
256 for tag in tags: |
|
257 font_set = False |
|
258 tag_weight = _calculate_tag_weight(tag.count, max_weight, distribution) |
|
259 for i in range(steps): |
|
260 if not font_set and tag_weight <= thresholds[i]: |
|
261 tag.font_size = i + 1 |
|
262 font_set = True |
|
263 return tags |