# -*- coding: utf-8 -*-
from django.conf import settings
from hdabo.models import Tag
from wikitools import api, wiki
from django.utils.http import urlquote

def normalize_tag(tag):
    if len(tag) == 0:
        return tag
    tag = tag.strip()
    tag = tag.replace("_", " ")
    tag = " ".join(tag.split())
    tag = tag[0].upper() + tag[1:]
    return tag

def urlize_for_wkipedia(label):
    return urlquote(label.replace(" ","_"))


def __is_homonymie(page_dict):
    for cat in page_dict.get(u"categories", []):
        if u'Catégorie:Homonymie' in cat.get(u"title", u"") or u'Category:Disambiguation pages' in cat.get(u"title", u""):
            return True
    return False

def query_wikipedia_title(site, label):
    
    params = {'action':'query', 'titles': label, 'prop':'info|categories|langlinks', 'inprop':'url', 'lllimit':'500', 'cllimit':'500'}
    wpquery = api.APIRequest(site, params) #@UndefinedVariable
    
    response = wpquery.query()
    original_response = response

    query_dict = response['query']
    # get page if multiple pages or none -> return Tag.null_result
    pages = query_dict.get("pages", {})
    if len(pages) > 1 or len(pages) == 0:
        return { 'new_label': None, 'status': Tag.TAG_URL_STATUS_DICT["null_result"], 'wikipedia_url': None, 'pageid': None, 'dbpedia_uri': None, 'response': response }
    
    page = pages.values()[0]
    
    if u"invalid" in page or u"missing" in page:
        return { 'new_label': None, 'status': Tag.TAG_URL_STATUS_DICT["null_result"], 'wikipedia_url': None, 'pageid': None, 'dbpedia_uri': None, 'response': response }

    url = page.get(u'fullurl', None)
    pageid = page.get(u'pageid', None)
    new_label = page[u'title']
    
    if __is_homonymie(page):
        status = Tag.TAG_URL_STATUS_DICT["homonyme"]
    elif u"redirect" in page:
        status = Tag.TAG_URL_STATUS_DICT["redirection"]
    else:
        status = Tag.TAG_URL_STATUS_DICT["match"]
    
    if status == Tag.TAG_URL_STATUS_DICT["redirection"]:
        params = {'action':'query', 'titles': label, 'prop':'info|categories|langlinks', 'inprop':'url', 'lllimit':'500', 'cllimit':'500', 'redirects':True}
        wpquery = api.APIRequest(site, params) #@UndefinedVariable    
        response = wpquery.query()
        query_dict = response['query']
        pages = query_dict.get("pages", {})
        #we know that we have at least one answer        
        if len(pages) > 1 or len(pages) == 0:
            return { 'new_label': None, 'status': Tag.TAG_URL_STATUS_DICT["null_result"], 'wikipedia_url': None, 'pageid': None, 'dbpedia_uri': None, 'response': response }
        page = pages.values()[0]
        

    
    #process language to extract the english label
    english_label = None
    
    if status == Tag.TAG_URL_STATUS_DICT['match'] or status == Tag.TAG_URL_STATUS_DICT['redirection']:
        lang_links = page.get('langlinks', [])
        for lang_info_dict in lang_links:
            if lang_info_dict['lang'] == "en":
                english_label = lang_info_dict["*"]
                break
    
    if english_label and "#" not in english_label:
        dbpedia_uri = settings.DBPEDIA_URI_TEMPLATE % (urlize_for_wkipedia(english_label))
    else:
        dbpedia_uri = None

    return { 'new_label': new_label, 'status': status, 'wikipedia_url': url, 'pageid': pageid, 'dbpedia_uri': dbpedia_uri, 'response': original_response }



def get_or_create_tag(tag_label):
    
    tag_label_normalized = normalize_tag(tag_label)
    # We get the wikipedia references for the tag_label
    # We get or create the tag object
    
    tag, created = Tag.objects.get_or_create(label__iexact=tag_label_normalized, defaults={'label':tag_label_normalized, 'original_label':tag_label})

    if created:
        site = wiki.Wiki(settings.WIKIPEDIA_API_URL) #@UndefinedVariable
        wp_res = query_wikipedia_title(site, tag_label_normalized) #@UnusedVariable
        new_label, status, url, pageid, dbpedia_uri = wp_res['new_label'], wp_res['status'], wp_res['wikipedia_url'], wp_res['pageid'], wp_res["dbpedia_uri"]

    
        # We save the datas
        if new_label is not None:
            tag.label = new_label
        if status is not None:
            tag.url_status = status
        tag.wikipedia_url = url            
        tag.wikipedia_pageid = pageid
        tag.dbpedia_uri = dbpedia_uri 

        tag.save()
        
    return tag, created

def process_tag(site, tag, verbosity):
    
    wp_res = query_wikipedia_title(site, tag.label)
    new_label, status, url, pageid, response, dbpedia_uri = wp_res['new_label'], wp_res['status'], wp_res['wikipedia_url'], wp_res['pageid'], wp_res['response'], wp_res["dbpedia_uri"]
    
    if verbosity >= 2 :
        print "response from query to %s with parameters %s :" % (site.apibase, repr(new_label))
        print repr(response)
    
    if new_label is not None:
        tag.label = new_label
    if status is not None:
        tag.url_status = status
    tag.wikipedia_url = url
    tag.wikipedia_pageid = pageid
    tag.dbpedia_uri = dbpedia_uri
        
    tag.save()


    
