update model and fixture
- wikipedia_activated
- Categories
Add check existing tag on datasheet
# -*- coding: utf-8 -*-
from django.conf import settings
from hdabo.models import Tag
from wikitools import api, wiki
def __is_homonymie(page_dict):
for cat in page_dict.get(u"categories", []):
if u'Catégorie:Homonymie' in cat.get(u"title", u"") or u'Category:Disambiguation pages' in cat.get(u"title", u""):
return True
return False
def query_wikipedia_title(site, label):
params = {'action':'query', 'titles': label, 'prop':'info|categories', 'inprop':'url'}
wpquery = api.APIRequest(site, params) #@UndefinedVariable
response = wpquery.query()
query_dict = response['query']
# get page if multiple pages or none -> return Tag.null_result
pages = query_dict.get("pages", {})
if len(pages) > 1 or len(pages) == 0:
return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None, response
page = pages.values()[0]
if u"invalid" in page or u"missing" in page:
return None, Tag.TAG_URL_STATUS_DICT["null_result"], None, None, response
url = page.get(u'fullurl', None)
pageid = page.get(u'pageid', None)
new_label = page[u'title']
if __is_homonymie(page):
status = Tag.TAG_URL_STATUS_DICT["homonyme"]
elif u"redirect" in page:
status = Tag.TAG_URL_STATUS_DICT["redirection"]
else:
status = Tag.TAG_URL_STATUS_DICT["match"]
return new_label, status, url, pageid, response
def normalize_tag(tag):
if len(tag) == 0:
return tag
tag = tag.strip()
tag = tag.replace("_", " ")
tag = " ".join(tag.split())
tag = tag[0].upper() + tag[1:]
return tag
def get_or_create_tag(tag_label):
tag_label_normalized = normalize_tag(tag_label)
# We get the wikipedia references for the tag_label
# We get or create the tag object
tag, created = Tag.objects.get_or_create(label__iexact=tag_label_normalized, defaults={'label':tag_label_normalized, 'original_label':tag_label})
if created:
site = wiki.Wiki(settings.WIKIPEDIA_API_URL) #@UndefinedVariable
new_label, status, url, pageid, response = query_wikipedia_title(site, tag_label_normalized) #@UnusedVariable
# We save the datas
if new_label is not None:
tag.label = new_label
if status is not None:
tag.url_status = status
if url is not None:
tag.wikipedia_url = url
else:
tag.wikipedia_url = None
if pageid is not None:
tag.wikipedia_pageid = pageid
else:
tag.wikipedia_pageid = None
tag.save()
return tag, created