src/hdabo/wp_utils.py
author ymh <ymh.work@gmail.com>
Wed, 11 Apr 2018 12:19:47 +0200
branchdocumentation
changeset 693 09e00f38d177
parent 281 bc0f26b1acc2
permissions -rw-r--r--
Add hdabo/hdalab documentations
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
24
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
42
861a78f74a37 modify behavior for tag modification on the datasheet
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
     2
from django.conf import settings
72
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
     3
from django.utils.http import urlquote
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
     4
from haystack.constants import DJANGO_ID
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
     5
from haystack.query import SearchQuerySet
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
     6
from hdabo.models import Tag, TaggedSheet
42
861a78f74a37 modify behavior for tag modification on the datasheet
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
     7
from wikitools import api, wiki
72
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
     8
47
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
     9
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    10
def normalize_tag(tag):
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    11
    if len(tag) == 0:
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    12
        return tag
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    13
    tag = tag.strip()
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    14
    tag = tag.replace("_", " ")
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    15
    tag = " ".join(tag.split())
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    16
    tag = tag[0].upper() + tag[1:]
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    17
    return tag
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    18
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
    19
def urlize_for_wikipedia(label):
72
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
    20
    return urlquote(label.replace(" ", "_"))
47
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    21
24
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
def __is_homonymie(page_dict):
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
    for cat in page_dict.get(u"categories", []):
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
        if u'Catégorie:Homonymie' in cat.get(u"title", u"") or u'Category:Disambiguation pages' in cat.get(u"title", u""):
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
            return True
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
    return False
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
111
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 108
diff changeset
    29
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
    30
def query_wikipedia_title(site, label=None, pageid=None):
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
    31
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
    32
    params = {'action':'query', 'prop':'info|categories|langlinks', 'inprop':'url', 'lllimit':'500', 'cllimit':'500', 'rvprop':'ids'}
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
    33
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
    34
    if label:
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
    35
        params['titles'] = label
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
    36
    else:
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
    37
        params['pageids'] = pageid
24
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
    wpquery = api.APIRequest(site, params) #@UndefinedVariable
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
    39
47
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    40
    response = wpquery.query()
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    41
    original_response = response
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
    42
    def return_null_result():
111
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 108
diff changeset
    43
        return { 'new_label': None, 'alternative_label': None, 'status': Tag.TAG_URL_STATUS_DICT["null_result"], 'wikipedia_url': None, 'pageid': None, 'alternative_wikipedia_url': None, 'alternative_pageid': None, 'dbpedia_uri': None, 'revision_id': None, 'response': response }
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
    44
24
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
    query_dict = response['query']
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
    # get page if multiple pages or none -> return Tag.null_result
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
    pages = query_dict.get("pages", {})
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
    if len(pages) > 1 or len(pages) == 0:
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
    50
        return return_null_result()
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
    51
24
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
    page = pages.values()[0]
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
    53
24
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
    if u"invalid" in page or u"missing" in page:
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
    55
        return return_null_result()
24
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
    url = page.get(u'fullurl', None)
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
    pageid = page.get(u'pageid', None)
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
    new_label = page[u'title']
108
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
    60
    alternative_label = None
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
    61
    alternative_url = None
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
    62
    alternative_pageid = None
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
    63
24
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
    if __is_homonymie(page):
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
        status = Tag.TAG_URL_STATUS_DICT["homonyme"]
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
    elif u"redirect" in page:
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
        status = Tag.TAG_URL_STATUS_DICT["redirection"]
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
    else:
8f84b9bbd22d add wikipedia link edition
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
        status = Tag.TAG_URL_STATUS_DICT["match"]
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
    70
47
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    71
    if status == Tag.TAG_URL_STATUS_DICT["redirection"]:
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
    72
        params['redirects'] = True
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
    73
        wpquery = api.APIRequest(site, params) #@UndefinedVariable
47
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    74
        response = wpquery.query()
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    75
        query_dict = response['query']
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    76
        pages = query_dict.get("pages", {})
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
    77
        #we know that we have at least one answer
47
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    78
        if len(pages) > 1 or len(pages) == 0:
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
    79
            return return_null_result()
47
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    80
        page = pages.values()[0]
108
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
    81
        alternative_label = page.get('title', None)
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
    82
        alternative_url = page.get('fullurl', None)
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
    83
        alternative_pageid = page.get('pageid',None)
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
    84
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
    85
    revision_id = page.get('lastrevid', None)
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
    86
281
bc0f26b1acc2 Hdalab : commands now work after update. Requests update with a dbpedia url from settings.
cavaliet
parents: 266
diff changeset
    87
    # to be perfect we should sparql request DBPEDIA_URI_TEMPLATE, but we simply build the url
bc0f26b1acc2 Hdalab : commands now work after update. Requests update with a dbpedia url from settings.
cavaliet
parents: 266
diff changeset
    88
    dbpedia_uri = settings.DBPEDIA_URI_TEMPLATE % ("resource", urlize_for_wikipedia(new_label))
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
    89
47
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    90
108
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
    91
    return { 'new_label': new_label, 'alternative_label': alternative_label, 'status': status, 'wikipedia_url': url, 'pageid': pageid, 'alternative_wikipedia_url': alternative_url, 'alternative_pageid': alternative_pageid, 'dbpedia_uri': dbpedia_uri, 'revision_id': revision_id, 'response': original_response }
47
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
    92
42
861a78f74a37 modify behavior for tag modification on the datasheet
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    93
861a78f74a37 modify behavior for tag modification on the datasheet
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    94
861a78f74a37 modify behavior for tag modification on the datasheet
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    95
def get_or_create_tag(tag_label):
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
    96
42
861a78f74a37 modify behavior for tag modification on the datasheet
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    97
    tag_label_normalized = normalize_tag(tag_label)
861a78f74a37 modify behavior for tag modification on the datasheet
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    98
    # We get the wikipedia references for the tag_label
861a78f74a37 modify behavior for tag modification on the datasheet
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
    99
    # We get or create the tag object
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   100
69
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 66
diff changeset
   101
    tag = None
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 66
diff changeset
   102
    for t in Tag.objects.filter(label__iexact=tag_label_normalized):
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 66
diff changeset
   103
        if tag is None or t.url_status != Tag.TAG_URL_STATUS_DICT['null_result']:
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 66
diff changeset
   104
            tag = t
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 66
diff changeset
   105
            if tag.url_status != Tag.TAG_URL_STATUS_DICT['null_result']:
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 66
diff changeset
   106
                break
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   107
69
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 66
diff changeset
   108
    if tag is None:
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 66
diff changeset
   109
        tag = Tag(label=tag_label_normalized, original_label=tag_label)
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 66
diff changeset
   110
        created = True
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 66
diff changeset
   111
    else:
3b4a2c79524e desactivation lien W dans la fiche
ymh <ymh.work@gmail.com>
parents: 66
diff changeset
   112
        created = False
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   113
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   114
    site = wiki.Wiki(settings.WIKIPEDIA_API_URL) #@UndefinedVariable
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   115
42
861a78f74a37 modify behavior for tag modification on the datasheet
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   116
    if created:
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   117
        wp_res = query_wikipedia_title(site, label=tag_label_normalized)
108
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
   118
        new_label = wp_res['new_label']
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
   119
        alternative_label= wp_res['alternative_label']
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
   120
        status = wp_res['status']
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
   121
        url = wp_res['wikipedia_url']
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
   122
        alternative_url = wp_res['alternative_wikipedia_url']
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
   123
        pageid = wp_res['pageid']
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
   124
        alternative_pageid = wp_res['alternative_pageid']
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
   125
        dbpedia_uri = wp_res["dbpedia_uri"]
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
   126
        wikipedia_revision_id = wp_res['revision_id']
47
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
   127
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   128
42
861a78f74a37 modify behavior for tag modification on the datasheet
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   129
        # We save the datas
861a78f74a37 modify behavior for tag modification on the datasheet
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   130
        if new_label is not None:
861a78f74a37 modify behavior for tag modification on the datasheet
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   131
            tag.label = new_label
861a78f74a37 modify behavior for tag modification on the datasheet
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   132
        if status is not None:
861a78f74a37 modify behavior for tag modification on the datasheet
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   133
            tag.url_status = status
108
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
   134
        tag.alternative_label = alternative_label
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
   135
        tag.alternative_wikipedia_url = alternative_url
4b73a767a6c0 backport changes made on model for hdabo_sf
ymh <ymh.work@gmail.com>
parents: 84
diff changeset
   136
        tag.alternative_wikipedia_pageid = alternative_pageid
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   137
        tag.wikipedia_url = url
47
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
   138
        tag.wikipedia_pageid = pageid
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   139
        tag.dbpedia_uri = dbpedia_uri
42
861a78f74a37 modify behavior for tag modification on the datasheet
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   140
861a78f74a37 modify behavior for tag modification on the datasheet
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   141
        tag.save()
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   142
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   143
    elif tag.wikipedia_pageid:
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   144
        wp_res = query_wikipedia_title(site, pageid=tag.wikipedia_pageid)
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   145
        wikipedia_revision_id = wp_res['revision_id']
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   146
    else:
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   147
        wikipedia_revision_id = None
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   148
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   149
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   150
    return tag, wikipedia_revision_id, created
42
861a78f74a37 modify behavior for tag modification on the datasheet
ymh <ymh.work@gmail.com>
parents: 25
diff changeset
   151
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   152
def process_tag(site, tag, verbosity=0):
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   153
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   154
    wp_res = query_wikipedia_title(site, label=tag.label)
111
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 108
diff changeset
   155
    new_label = wp_res['new_label']
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 108
diff changeset
   156
    alternative_label= wp_res['alternative_label']
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 108
diff changeset
   157
    status =  wp_res['status']
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 108
diff changeset
   158
    url = wp_res['wikipedia_url']
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 108
diff changeset
   159
    alternative_url = wp_res['alternative_wikipedia_url']
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 108
diff changeset
   160
    pageid = wp_res['pageid']
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 108
diff changeset
   161
    alternative_pageid = wp_res['alternative_pageid']
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 108
diff changeset
   162
    response = wp_res['response']
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 108
diff changeset
   163
    dbpedia_uri =  wp_res["dbpedia_uri"]
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 108
diff changeset
   164
    revision_id = wp_res["revision_id"]
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   165
47
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
   166
    if verbosity >= 2 :
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
   167
        print "response from query to %s with parameters %s :" % (site.apibase, repr(new_label))
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
   168
        print repr(response)
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   169
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   170
    prev_wikipedia_pageid = tag.wikipedia_pageid
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   171
47
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
   172
    if new_label is not None:
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
   173
        tag.label = new_label
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
   174
    if status is not None:
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
   175
        tag.url_status = status
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
   176
    tag.wikipedia_url = url
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
   177
    tag.wikipedia_pageid = pageid
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
   178
    tag.dbpedia_uri = dbpedia_uri
111
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 108
diff changeset
   179
    tag.alternative_label = alternative_label
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 108
diff changeset
   180
    tag.alternative_wikipedia_url = alternative_url
ceb381f5b0c7 query wp for categories and infoboxes
ymh <ymh.work@gmail.com>
parents: 108
diff changeset
   181
    tag.alternative_wikipedia_pageid = alternative_pageid
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   182
47
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
   183
    tag.save()
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   184
66
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   185
    if prev_wikipedia_pageid != pageid:
289ded098316 add revision link cf bug #12
ymh <ymh.work@gmail.com>
parents: 47
diff changeset
   186
        TaggedSheet.objects.filter(tag=tag).update(wikipedia_revision_id=revision_id)
72
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
   187
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
   188
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
   189
def reorder_datasheet_tags(ds):
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   190
    """
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   191
    Reorder a Datasheet tags (object Tag) according to the score they obtain on a search on the title and description of the Datasheet.
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   192
    """
72
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
   193
    ts_list = []
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
   194
    for ts in ds.taggedsheet_set.all():
83
1c4729b3dac1 Correction bug #20. The solution is mainly to make sure that the index is recalculated
ymh <ymh.work@gmail.com>
parents: 72
diff changeset
   195
        ts.index_note = 0
72
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
   196
        kwargs = {DJANGO_ID + "__exact": unicode(ds.pk)}
83
1c4729b3dac1 Correction bug #20. The solution is mainly to make sure that the index is recalculated
ymh <ymh.work@gmail.com>
parents: 72
diff changeset
   197
72
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
   198
        results = SearchQuerySet().filter(title=ts.tag.label).filter_or(description=ts.tag.label).filter(**kwargs)
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
   199
        if len(results) > 0:
83
1c4729b3dac1 Correction bug #20. The solution is mainly to make sure that the index is recalculated
ymh <ymh.work@gmail.com>
parents: 72
diff changeset
   200
            ts.index_note += results[0].score
84
84dc6383323d Correction bug #20. The solution is mainly to make sure that the index is recalculated - small optimization
ymh <ymh.work@gmail.com>
parents: 83
diff changeset
   201
            ts.save()
83
1c4729b3dac1 Correction bug #20. The solution is mainly to make sure that the index is recalculated
ymh <ymh.work@gmail.com>
parents: 72
diff changeset
   202
72
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
   203
        ts_list.append(ts)
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
   204
    ts_list.sort(key=lambda t: (-t.index_note, t.order))
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
   205
    for k, ts in enumerate(ts_list):
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
   206
        ts.order = k + 1
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
   207
        ts.save()
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
   208
    if ds.manual_order:
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
   209
        ds.manual_order = False
ba8ebabbaece -correct css and display
ymh <ymh.work@gmail.com>
parents: 69
diff changeset
   210
        ds.save()
47
08b008c5a07d - add popularity
ymh <ymh.work@gmail.com>
parents: 43
diff changeset
   211
693
09e00f38d177 Add hdabo/hdalab documentations
ymh <ymh.work@gmail.com>
parents: 281
diff changeset
   212