web/lib/tagging/utils.py
changeset 11 f236caaceb43
equal deleted inserted replaced
10:7207a5a27b8f 11:f236caaceb43
       
     1 """
       
     2 Tagging utilities - from user tag input parsing to tag cloud
       
     3 calculation.
       
     4 """
       
     5 import math
       
     6 import types
       
     7 
       
     8 from django.db.models.query import QuerySet
       
     9 from django.utils.encoding import force_unicode
       
    10 from django.utils.translation import ugettext as _
       
    11 
       
    12 # Python 2.3 compatibility
       
    13 try:
       
    14     set
       
    15 except NameError:
       
    16     from sets import Set as set
       
    17 
       
    18 def parse_tag_input(input):
       
    19     """
       
    20     Parses tag input, with multiple word input being activated and
       
    21     delineated by commas and double quotes. Quotes take precedence, so
       
    22     they may contain commas.
       
    23 
       
    24     Returns a sorted list of unique tag names.
       
    25     """
       
    26     if not input:
       
    27         return []
       
    28 
       
    29     input = force_unicode(input)
       
    30 
       
    31     # Special case - if there are no commas or double quotes in the
       
    32     # input, we don't *do* a recall... I mean, we know we only need to
       
    33     # split on spaces.
       
    34     if u',' not in input and u'"' not in input:
       
    35         words = list(set(split_strip(input, u' ')))
       
    36         words.sort()
       
    37         return words
       
    38 
       
    39     words = []
       
    40     buffer = []
       
    41     # Defer splitting of non-quoted sections until we know if there are
       
    42     # any unquoted commas.
       
    43     to_be_split = []
       
    44     saw_loose_comma = False
       
    45     open_quote = False
       
    46     i = iter(input)
       
    47     try:
       
    48         while 1:
       
    49             c = i.next()
       
    50             if c == u'"':
       
    51                 if buffer:
       
    52                     to_be_split.append(u''.join(buffer))
       
    53                     buffer = []
       
    54                 # Find the matching quote
       
    55                 open_quote = True
       
    56                 c = i.next()
       
    57                 while c != u'"':
       
    58                     buffer.append(c)
       
    59                     c = i.next()
       
    60                 if buffer:
       
    61                     word = u''.join(buffer).strip()
       
    62                     if word:
       
    63                         words.append(word)
       
    64                     buffer = []
       
    65                 open_quote = False
       
    66             else:
       
    67                 if not saw_loose_comma and c == u',':
       
    68                     saw_loose_comma = True
       
    69                 buffer.append(c)
       
    70     except StopIteration:
       
    71         # If we were parsing an open quote which was never closed treat
       
    72         # the buffer as unquoted.
       
    73         if buffer:
       
    74             if open_quote and u',' in buffer:
       
    75                 saw_loose_comma = True
       
    76             to_be_split.append(u''.join(buffer))
       
    77     if to_be_split:
       
    78         if saw_loose_comma:
       
    79             delimiter = u','
       
    80         else:
       
    81             delimiter = u' '
       
    82         for chunk in to_be_split:
       
    83             words.extend(split_strip(chunk, delimiter))
       
    84     words = list(set(words))
       
    85     words.sort()
       
    86     return words
       
    87 
       
    88 def split_strip(input, delimiter=u','):
       
    89     """
       
    90     Splits ``input`` on ``delimiter``, stripping each resulting string
       
    91     and returning a list of non-empty strings.
       
    92     """
       
    93     if not input:
       
    94         return []
       
    95 
       
    96     words = [w.strip() for w in input.split(delimiter)]
       
    97     return [w for w in words if w]
       
    98 
       
    99 def edit_string_for_tags(tags):
       
   100     """
       
   101     Given list of ``Tag`` instances, creates a string representation of
       
   102     the list suitable for editing by the user, such that submitting the
       
   103     given string representation back without changing it will give the
       
   104     same list of tags.
       
   105 
       
   106     Tag names which contain commas will be double quoted.
       
   107 
       
   108     If any tag name which isn't being quoted contains whitespace, the
       
   109     resulting string of tag names will be comma-delimited, otherwise
       
   110     it will be space-delimited.
       
   111     """
       
   112     names = []
       
   113     use_commas = False
       
   114     for tag in tags:
       
   115         name = tag.name
       
   116         if u',' in name:
       
   117             names.append('"%s"' % name)
       
   118             continue
       
   119         elif u' ' in name:
       
   120             if not use_commas:
       
   121                 use_commas = True
       
   122         names.append(name)
       
   123     if use_commas:
       
   124         glue = u', '
       
   125     else:
       
   126         glue = u' '
       
   127     return glue.join(names)
       
   128 
       
   129 def get_queryset_and_model(queryset_or_model):
       
   130     """
       
   131     Given a ``QuerySet`` or a ``Model``, returns a two-tuple of
       
   132     (queryset, model).
       
   133 
       
   134     If a ``Model`` is given, the ``QuerySet`` returned will be created
       
   135     using its default manager.
       
   136     """
       
   137     try:
       
   138         return queryset_or_model, queryset_or_model.model
       
   139     except AttributeError:
       
   140         return queryset_or_model._default_manager.all(), queryset_or_model
       
   141 
       
   142 def get_tag_list(tags):
       
   143     """
       
   144     Utility function for accepting tag input in a flexible manner.
       
   145 
       
   146     If a ``Tag`` object is given, it will be returned in a list as
       
   147     its single occupant.
       
   148 
       
   149     If given, the tag names in the following will be used to create a
       
   150     ``Tag`` ``QuerySet``:
       
   151 
       
   152        * A string, which may contain multiple tag names.
       
   153        * A list or tuple of strings corresponding to tag names.
       
   154        * A list or tuple of integers corresponding to tag ids.
       
   155 
       
   156     If given, the following will be returned as-is:
       
   157 
       
   158        * A list or tuple of ``Tag`` objects.
       
   159        * A ``Tag`` ``QuerySet``.
       
   160 
       
   161     """
       
   162     from tagging.models import Tag
       
   163     if isinstance(tags, Tag):
       
   164         return [tags]
       
   165     elif isinstance(tags, QuerySet) and tags.model is Tag:
       
   166         return tags
       
   167     elif isinstance(tags, types.StringTypes):
       
   168         return Tag.objects.filter(name__in=parse_tag_input(tags))
       
   169     elif isinstance(tags, (types.ListType, types.TupleType)):
       
   170         if len(tags) == 0:
       
   171             return tags
       
   172         contents = set()
       
   173         for item in tags:
       
   174             if isinstance(item, types.StringTypes):
       
   175                 contents.add('string')
       
   176             elif isinstance(item, Tag):
       
   177                 contents.add('tag')
       
   178             elif isinstance(item, (types.IntType, types.LongType)):
       
   179                 contents.add('int')
       
   180         if len(contents) == 1:
       
   181             if 'string' in contents:
       
   182                 return Tag.objects.filter(name__in=[force_unicode(tag) \
       
   183                                                     for tag in tags])
       
   184             elif 'tag' in contents:
       
   185                 return tags
       
   186             elif 'int' in contents:
       
   187                 return Tag.objects.filter(id__in=tags)
       
   188         else:
       
   189             raise ValueError(_('If a list or tuple of tags is provided, they must all be tag names, Tag objects or Tag ids.'))
       
   190     else:
       
   191         raise ValueError(_('The tag input given was invalid.'))
       
   192 
       
   193 def get_tag(tag):
       
   194     """
       
   195     Utility function for accepting single tag input in a flexible
       
   196     manner.
       
   197 
       
   198     If a ``Tag`` object is given it will be returned as-is; if a
       
   199     string or integer are given, they will be used to lookup the
       
   200     appropriate ``Tag``.
       
   201 
       
   202     If no matching tag can be found, ``None`` will be returned.
       
   203     """
       
   204     from tagging.models import Tag
       
   205     if isinstance(tag, Tag):
       
   206         return tag
       
   207 
       
   208     try:
       
   209         if isinstance(tag, types.StringTypes):
       
   210             return Tag.objects.get(name=tag)
       
   211         elif isinstance(tag, (types.IntType, types.LongType)):
       
   212             return Tag.objects.get(id=tag)
       
   213     except Tag.DoesNotExist:
       
   214         pass
       
   215 
       
   216     return None
       
   217 
       
   218 # Font size distribution algorithms
       
   219 LOGARITHMIC, LINEAR = 1, 2
       
   220 
       
   221 def _calculate_thresholds(min_weight, max_weight, steps):
       
   222     delta = (max_weight - min_weight) / float(steps)
       
   223     return [min_weight + i * delta for i in range(1, steps + 1)]
       
   224 
       
   225 def _calculate_tag_weight(weight, max_weight, distribution):
       
   226     """
       
   227     Logarithmic tag weight calculation is based on code from the
       
   228     `Tag Cloud`_ plugin for Mephisto, by Sven Fuchs.
       
   229 
       
   230     .. _`Tag Cloud`: http://www.artweb-design.de/projects/mephisto-plugin-tag-cloud
       
   231     """
       
   232     if distribution == LINEAR or max_weight == 1:
       
   233         return weight
       
   234     elif distribution == LOGARITHMIC:
       
   235         return math.log(weight) * max_weight / math.log(max_weight)
       
   236     raise ValueError(_('Invalid distribution algorithm specified: %s.') % distribution)
       
   237 
       
   238 def calculate_cloud(tags, steps=4, distribution=LOGARITHMIC):
       
   239     """
       
   240     Add a ``font_size`` attribute to each tag according to the
       
   241     frequency of its use, as indicated by its ``count``
       
   242     attribute.
       
   243 
       
   244     ``steps`` defines the range of font sizes - ``font_size`` will
       
   245     be an integer between 1 and ``steps`` (inclusive).
       
   246 
       
   247     ``distribution`` defines the type of font size distribution
       
   248     algorithm which will be used - logarithmic or linear. It must be
       
   249     one of ``tagging.utils.LOGARITHMIC`` or ``tagging.utils.LINEAR``.
       
   250     """
       
   251     if len(tags) > 0:
       
   252         counts = [tag.count for tag in tags]
       
   253         min_weight = float(min(counts))
       
   254         max_weight = float(max(counts))
       
   255         thresholds = _calculate_thresholds(min_weight, max_weight, steps)
       
   256         for tag in tags:
       
   257             font_set = False
       
   258             tag_weight = _calculate_tag_weight(tag.count, max_weight, distribution)
       
   259             for i in range(steps):
       
   260                 if not font_set and tag_weight <= thresholds[i]:
       
   261                     tag.font_size = i + 1
       
   262                     font_set = True
       
   263     return tags