src/hdalab/views/ajax.py
author ymh <ymh.work@gmail.com>
Wed, 18 Mar 2015 18:52:43 +0100
changeset 545 c752fdee555b
parent 445 a74ec9e02042
child 603 979d9263c1c6
permissions -rw-r--r--
Migration to django 1.7

# -*- coding: utf-8 -*-
'''
Created on Jan 31, 2012

@author: ymh
'''
from django.conf import settings
from django.core.cache import cache
from django.db.models import Q, Count, Min
from django.http import HttpResponse
from hdabo.models import Tag, Datasheet, TaggedSheet
from hdalab.models import HdaSession, Country, TagYears, DatasheetExtras
from hdalab.models.dataviz import DbpediaFieldsTranslation, DbpediaFields
from hdalab.models.categories import WpCategory
from hdalab.utils import fix_cache_key
import copy
import json
import hmac
import itertools
import uuid

import logging
logger = logging.getLogger(__name__)

def tagtranslation(request):
    
    lang = request.GET.get('lang',request.LANGUAGE_CODE)
    labels = request.GET.get('labels',None)

    if not labels:
        return HttpResponse(content=json.dumps({}), content_type='application/json')
    
    labelslist = [lbl.strip() for lbl in labels.split(",")]
    masters = []
    
    for lbl in labelslist:
        labelqs = Tag.objects.select_related('dbpedia_fields').filter(~Q(dbpedia_uri = None), label__iexact = lbl)[0:1]
        if len(labelqs) > 0:
            tag = labelqs.get()
            if tag.dbpedia_fields:
                masters.append(tag.dbpedia_fields)
    
    translationqs = DbpediaFieldsTranslation.objects.select_related("master", "master__tag").filter(master__in = masters, language_code=lang)
    
    translations = dict([(t.master.label, t.label) for t in translationqs])
    
    return HttpResponse(content=json.dumps(translations), content_type='application/json')

def subcat(category, globtags, level, max_level ):
    # recursive function used by cattree
    catlabel = category.label
    tags = Tag.objects.filter(wp_categories__wp_category = category).distinct()
    taglabels = [k for k in dict([(t.label,t.label) for t in tags])]
    resobj = {
          'label': category.label,
          'themes': [],
          'contents': []
      }
    for label in taglabels:
        if label == catlabel:
            globtags[label] = {'level': level, 'access': resobj }
        else:
            tag_in_list = {'label' : label, 'contents': []}
            resobj['themes'].append(tag_in_list)
            globtags[label] = {'level': (level + 1), 'access': tag_in_list }
            
    if level < max_level:
        subcats = WpCategory.objects.filter(parent_categories__parent_category = category)
        resobj['themes'] += [subcat(subcats[i], globtags, level + 1, max_level ) for i in range(len(subcats))]
    return resobj

def cleantags(category):
    if category.has_key('contents') and len(category['contents']) == 0:
        del category['contents']
    if category.has_key('contents'):
        category['contents'] = sorted(category['contents'], key=lambda content: -content['score'])
    if category.has_key('themes'):
        themes = []
        for theme in category['themes']:
            clean_theme = cleantags(theme)
            if clean_theme.has_key('themes') or clean_theme.has_key('contents'):
                themes.append(clean_theme)
        category['themes'] = sorted(themes, key=lambda cat: cat['label'])
        if len(category['themes']) == 0:
            del category['themes']
    return category

def cattree(request):
    # Gets the category tree from a label
    ROOT_MAX_TAG_ORDER = 8
    MAX_TAG_ORDER = 8
    MAX_LEVEL = 3
    LEVEL_COEFF = 5
    label = request.GET.get('label', None)
    lowerlabel = label.lower()
    globtags = {}
    resobj = None
    master_category = WpCategory.objects.filter(label__iexact=label)[0:1]
    if len(master_category):
        resobj = subcat(master_category[0], globtags, 1, MAX_LEVEL )
       
    #datasheets = Datasheet.objects.filter(validated = True, taggedsheet__tag__label__in = tag_list, taggedsheet__order__lte = MAX_TAG_ORDER).distinct()
    datasheets = Datasheet.objects.filter(validated = True, taggedsheet__tag__label__iexact = label, taggedsheet__order__lte = ROOT_MAX_TAG_ORDER).select_related('organisation').distinct()
    for datasheet in datasheets:
        # Calculating where we add the datasheet in the tree
        maintag = None
        maintagscore = -5
        dsscore = 0
        rootscore = 0
        for ts in TaggedSheet.objects.select_related('tag','datasheet').filter(datasheet__id=datasheet.id,order__lte=MAX_TAG_ORDER):
            label = ts.tag.label
            if globtags.has_key(label):
                score = LEVEL_COEFF * globtags[label]['level'] - ts.order
                if score > maintagscore:
                    maintagscore = score
                    maintag = label
                    dsscore = (MAX_TAG_ORDER - ts.order)
                if label.lower() == lowerlabel:
                    rootscore = (ROOT_MAX_TAG_ORDER - ts.order)
        if maintag is not None:
            globtags[maintag]['access']['contents'].append({
                'id': datasheet.id,
                'title': datasheet.title,
                'url': datasheet.url,
                'description': datasheet.description,
                'hda_id': datasheet.hda_id,
                'organization': datasheet.organisation.name,
                'organization_url': datasheet.organisation.website,
                'score': max(dsscore, rootscore)
            })
    cleantags(resobj)
    
    return HttpResponse(content=json.dumps(resobj), content_type='application/json')

def sessioninfo(request):
    
    data = json.loads(request.GET.get('data', "{}"))
    write = False
    
    if 'sessionid' in request.GET:
        request.session['sessionid'] = request.GET['sessionid']
    if 'sessionkey' in request.GET:
        request.session['sessionkey'] = request.GET['sessionkey']
        
    if 'sessionid' in request.session:
        sessionid = request.session['sessionid']
        
        if HdaSession.objects.filter(sessionid=sessionid).count() == 1:
            sessionkey = request.session.get('sessionkey',None)
            hm = hmac.new(settings.SECRET_KEY, sessionid)
            if hm.hexdigest() == sessionkey:
                write = True            
        else:
            del request.session['sessionid']
        
    if 'sessionid' not in request.session:
        sessionid = unicode(uuid.uuid1())
        HdaSession.objects.create(sessionid=sessionid, data=json.dumps({}))
        write = True
        request.session['sessionid'] = sessionid
        request.session['sessionkey'] = hmac.new(settings.SECRET_KEY, sessionid).hexdigest()
        
    if write and data:
        HdaSession.objects.filter(sessionid=sessionid).update(data=json.dumps(data))
    else:
        data = HdaSession.objects.get(sessionid=sessionid).data
        data = json.loads(data) if data else {}
         
    resobj = {'data': data, "write_allowed" : write, "sessionid": sessionid }
    if write:
        resobj['sessionkey'] = request.session['sessionkey']
        
    return HttpResponse(content=json.dumps(resobj), content_type='application/json')


def tagsearch(request):
    
    q = request.GET.get('term',None)
    maxcount = int(request.GET.get('count','40'))
    lang = request.GET.get('lang',request.LANGUAGE_CODE)
    count_notices_str = request.REQUEST.get("count_notices")
    count_notices_bool = True
    if count_notices_str:
        count_notices_bool = {'true': True, 'false': False, "0": False, "1": True}.get(count_notices_str.lower())
    
    stemming_langs = [ 'fr', 'en', 'de', 'it' ]
    # For Japanese, there are no word boundaries, we should not use the regexp in that case
    no_translate_langs = [ 'fr' ]
    
    if q:
        lq = q.lower()
        qs = Tag.objects.select_related('dbpedia_fields').filter(datasheet__validated=True)
        qrx = '(\\m|\\b)%s'%q
        if lang in no_translate_langs:
            if lang in stemming_langs:
                qs = qs.filter( label__iregex = qrx )
            else:
                qs = qs.filter( label__icontains = q )
        else:
            if lang in stemming_langs:
                qs = qs.filter(dbpedia_fields__translations__label__iregex=qrx, dbpedia_fields__translations__language_code=lang, dbpedia_fields__translations__is_label_translated = True)
            else:
                qs = qs.filter(dbpedia_fields__translations__label__icontains=q, dbpedia_fields__translations__language_code=lang, dbpedia_fields__translations__is_label_translated = True)
    else:
        qs = Tag.objects.filter(~Q(dbpedia_uri = None))
    
    if count_notices_bool:
        qs = qs.annotate(nb=Count('datasheet',distinct=True)).order_by('-nb')[:maxcount]
    else:
        qs = qs.distinct()[:maxcount]
    
    qslist = list(qs)
    
    if lang in no_translate_langs:
        translations = {}
    else:
        transqs = DbpediaFieldsTranslation.objects.filter(master__tag__in = qslist, language_code=lang, is_label_translated=True).select_related("master")
        translations = dict([(tr.master.tag_id, {'label':tr.label,'abstract':tr.abstract, 'is_label_translated': tr.is_label_translated}) for tr in transqs])
    
    res = []
    
    for t in qslist:
        if hasattr(t, 'dbpedia_fields'):
            dbfields = t.dbpedia_fields
            resobj = {'original_label':t.label, 'url':t.wikipedia_url}
            if count_notices_bool:
                resobj['nb'] = t.nb
            resobj['thumbnail'] = dbfields.thumbnail if dbfields is not None else None
            
            if t.id in translations:
                resobj['value'] = translations[t.id]['label']
                resobj['abstract'] = translations[t.id]['abstract']
            else:
                resobj['value'] = t.label
                resobj['abstract'] = dbfields.abstract if dbfields is not None else None
            if q is None or resobj['value'].lower().find(lq) != -1:
                res.append(resobj)
    
    return HttpResponse(content=json.dumps(res), content_type='application/json')

def catsearch(request):
    
    q = request.GET.get('term',None)
    
    # On ne récupère que les catégories qui sont également des tags
    qrx = '(\\m|\\b)%s'%q
    qs = Tag.objects.filter(label__iregex=qrx)
    
    labels = [tag.label for tag in qs]
    
    qs = WpCategory.objects.annotate(nb=Count('child_categories__child_category__tags')).filter(label__in = labels, nb__gt=0)
    
    res = [{'value':t.label} for t in qs]
    
    return HttpResponse(content=json.dumps(res), content_type='application/json')

def filter(request):
    
    lang = request.GET.get('lang',request.LANGUAGE_CODE)
    periode = request.GET.get('period',None)
    label = request.GET.get('label', None)
    country = request.GET.get('country', None)
    contentlist = request.GET.get('contentlist', None)
    max_tag_order = int(request.GET.get('mto', '12'))
    content_count = request.GET.get('contentcount', 8)
    tag_count = request.GET.get('tagcount', 30)
    
    outputstr = filter_generic(lang, periode, label, country, contentlist, max_tag_order, content_count, tag_count)
        
    return HttpResponse(content=outputstr, content_type='application/json')


def filter_generic(lang="fr-fr", periode=None, label=None, country=None, contentlist=None, max_tag_order=12, content_count=8, tag_count=30):
    
    no_translate_langs = [ 'fr' ]
    
    key_parts = ("filter",lang,periode,label,country,contentlist,max_tag_order,content_count,tag_count)
    key_parts = [unicode(p).encode("ascii", "ignore") for p in key_parts]
    
    cache_key = fix_cache_key("-".join(key_parts))
    
    outputstr = cache.get(cache_key)
    
    if outputstr is None:
        
        matchtagids = []
        
        tagqs = Tag.objects.exclude(category__label__in = ['Datation', 'Localisation', 'Discipline artistique']).filter(~Q(dbpedia_uri = None))
        countryqs = Country.objects
        discqs = Tag.objects.filter(~Q(dbpedia_uri = None), category__label = u'Discipline artistique').select_related('dbpedia_fields')
        yearqs = TagYears.objects
        
        contentqs = Datasheet.objects.filter(validated=True)
        labeltranslations = []
    
        if label or periode or country or contentlist :
            matchtagqslist = []
            
            if periode:
                years = periode.split(",")
                start_year = int(years[0])
                end_year = int(years[0:2][-1])
                delta = max(1, (end_year-start_year)/2)
                minstart = start_year - delta
                maxend = end_year + delta
                matchtagqs = Tag.objects.filter(~Q(dbpedia_uri = None),
                                                years__end_year__gte = start_year, 
                                                years__start_year__lte = end_year,
                                                years__end_year__lte = maxend,
                                                years__start_year__gte = minstart,
                                                )
                matchtagqslist.append(matchtagqs)
                
            if label:
                for txtlbl in label.split(","):
                    matchtagqs = Tag.objects.select_related('dbpedia_fields').filter(~Q(dbpedia_uri = None), label__iexact = txtlbl.strip())
                    matchtagqslist.append(matchtagqs)
                
            if country:
                for country_uri in country.split(","):
                    matchtagqs = Tag.objects.filter(~Q(dbpedia_uri = None),locatedin__country__dbpedia_uri = country_uri)
                    matchtagids += [t.id for t in matchtagqs if t.id not in matchtagids]
                    matchtagqslist.append(matchtagqs)
            if contentlist:
                contentqs = contentqs.filter(id__in = contentlist.split(","))

            tagcond = None
            tagcondid = None
            for matchtagqs in matchtagqslist:
                newcond = Q(id__in = TaggedSheet.objects.filter(tag__in = copy.deepcopy(matchtagqs), order__lte = max_tag_order).values('datasheet_id'))
                newcondid = Q(id__in = matchtagqs)
                tagcond = newcond if tagcond is None else (tagcond & newcond)
                tagcondid = newcondid if tagcondid is None else (tagcondid | newcondid)
            
            contentqs = contentqs.filter(tagcond).distinct()
            matchtagidsqs = list(Tag.objects.select_related("dbpedia_fields").only("id").filter(tagcondid))
            matchtagids = [t.id for t in matchtagidsqs]
            
            if lang not in no_translate_langs:
                masters = [t.dbpedia_fields for t in matchtagidsqs if t.dbpedia_fields is not None]
                
                translationqs = DbpediaFieldsTranslation.objects.select_related("master", "master__tag").filter(master__in = masters, language_code=lang)    
                labeltranslations = [{'label':t.master.label, 'translated_label':t.label} for t in translationqs]
            
            tagqs = tagqs.filter(datasheet__in = contentqs)
            countryqs = countryqs.filter(includes__tag__taggedsheet__datasheet__in = contentqs)
            discqs = discqs.filter(datasheet__in = contentqs)
            yearqs = yearqs.filter(tag__taggedsheet__datasheet__in = contentqs)
            
        if contentlist is None:
            contentqs.order_by('?')
                        
        cont_count = contentqs.count()
        
        logger.debug("ajax filter SQL for contentqs %s", contentqs.query)
        
        contenus = dict([(content.id, {'score' : 0, 'tags' : [], 'hda_id': content.hda_id, 'id':content.id, 'title': content.title, 'description': content.description, 'url': content.url}) for content in contentqs[0:content_count]])
        contentids = contenus.keys()
        
        qs = DatasheetExtras.objects.select_related('insee').filter(datasheet__in = contentids)
        for dse in qs:
            contenus[dse.datasheet_id]['coords'] = {'city_name': dse.insee.city_name, 'latitude': dse.insee.latitude, 'longitude': dse.insee.longitude}
        
        qs = list(TaggedSheet.objects.select_related('tag', 'tag__dbpedia_fields').filter(datasheet__in = contentids, order__lte = max_tag_order).order_by('order'))
        
        translations = {}
        
        if lang not in no_translate_langs:
            ts_list = []
            for ts in qs:
                if hasattr(ts, 'tag') and hasattr(ts.tag, 'dbpedia_fields') :
                    ts_list.append(ts.tag.dbpedia_fields)
            transqs = DbpediaFieldsTranslation.objects.filter(master__in = ts_list, language_code = lang)
            translations = dict([(trans.master_id,trans.label) for trans in transqs])
        
        for ts in qs:
            if hasattr(ts, 'tag') and hasattr(ts.tag, 'dbpedia_fields') :
                match_tag = ts.tag.id in matchtagids
                contenus[ts.datasheet_id]['tags'].append({'id': ts.tag.id, 
                                                          'label': ts.tag.label, 
                                                          'order': ts.order, 
                                                          'match': match_tag, 
                                                          'translated_label': translations.get(ts.tag.dbpedia_fields.id, ts.tag.label) if ts.tag.dbpedia_fields is not None else ts.tag.label,
                                                          'url': ts.tag.dbpedia_uri,
                                                          'wkpd_url': ts.tag.wikipedia_url})
                
                if match_tag:
                    contenus[ts.datasheet_id]['score'] += 2*max_tag_order - ts.order
            
        if contentlist is None:
            contenus = sorted(contenus.values(),key=lambda e: -e['score'])
        else:
            contenus = contenus.values()
    
        #tagqs = tagqs.annotate(nb=Count('datasheet')).order_by('-nb')[:tag_count]
        tagqs = tagqs.annotate(nb=Count('datasheet')).order_by('-nb').only('id','label')[:tag_count]
        #.select_related('dbpedia_fields')
        # hack to add only necessary fields in the group by
        # contournement bug https://code.djangoproject.com/ticket/17144
        tagqs.query.clear_select_fields()
        tagqs.query.add_fields(['id','label'], False)
        tagqs.query.set_group_by()
    
        tagqslist = list(tagqs)
        
        dbpediafields = dict([(df.tag_id, df) for df in DbpediaFields.objects.filter(tag__in = tagqslist)])

        if lang not in no_translate_langs:
            transqs = DbpediaFieldsTranslation.objects.filter(master__in = dbpediafields.values(), language_code = lang)
            translations = dict([(trans.master_id,trans.label) for trans in transqs])
    
        tags = [{'id': tag.id, 
                 'label': tag.label,
                 'score': tag.nb, 
                 'thumbnail': dbpediafields[tag.id].thumbnail if tag.id in dbpediafields else None,
                 'translated_label': translations.get(dbpediafields[tag.id].id, tag.label) if tag.id in dbpediafields else tag.label,
                 'url': tag.dbpedia_uri,
                 'wkpd_url': tag.wikipedia_url} for tag in tagqslist]
    
        countryqs = countryqs.annotate(nb=Count('includes__tag__taggedsheet'))
        countries = dict([(country.dbpedia_uri, country.nb) for country in countryqs])
    
        discqslist = list(discqs.annotate(nb=Count('taggedsheet')).order_by('-nb')[:10])

        if lang not in no_translate_langs:
            list_dbpediafields = [tag.dbpedia_fields for tag in discqslist if tag.dbpedia_fields is not None]
            transqs = DbpediaFieldsTranslation.objects.filter(master__in = list_dbpediafields, language_code = lang)
            translations = dict([(trans.master_id,trans.label) for trans in transqs])
        
        disciplines = [{'label':tag.label,'score':tag.nb, 'translated_label': translations.get(tag.dbpedia_fields.id, tag.label) if tag.dbpedia_fields is not None else tag.label} for tag in discqslist]
        
        years = {}
        yearqs = yearqs.annotate(nb=Count('tag__taggedsheet'))
        for ty in yearqs:
            for year in range(ty.start_year, ty.end_year):
                years[year] = ty.nb + (years[year] if year in years else 0)
                
        yearchange = []
        for year in sorted(years.keys()):
            score = years[year]
            if year < 2011:
                if (year-1 not in years and score != 0) or (year-1 in years and years[year-1] != score):
                    yearchange.append({'year': year, 'score': score})
                if year+1 not in years and year != -1 and score != 0:
                    yearchange.append({'year': year+1, 'score': 0})
    
        tag_translations = {}
        for t in itertools.chain(labeltranslations,disciplines,tags):
            tag_translations[t['label']] = t['translated_label']
        for c in contenus:
            for t in c['tags']:
                tag_translations[t['label']] = t['translated_label']
        
        output = {'count': cont_count, 'contents': contenus, 'tags':tags, 'sparkline':yearchange, 'countries':countries, 'disciplines':disciplines, 'tagtranslations': tag_translations}
        outputstr = json.dumps(output)
        cache.set(cache_key, outputstr)
    
    return outputstr
    


def subtree(tree):
    MAX_TAG_ORDER = 16
    label = tree['label']
    sub = tree.get('contents',[])
    
    datasheets = Datasheet.objects.filter(validated = True, taggedsheet__tag__label__iexact = label, taggedsheet__order__lte = MAX_TAG_ORDER).annotate(tagorder=Min('taggedsheet__order')).select_related('organisation').distinct()
    
    contents = [{ 'description': ds.description, 'title': ds.title, 'url': ds.url, 'score': int((MAX_TAG_ORDER - ds.tagorder)/2), 'id': ds.id, 'hda_id': ds.hda_id, 'organization': ds.organisation.name, 'organization_url': ds.organisation.website } for ds in datasheets]
    
    contents = sorted(contents, key=lambda e: -e['score'])
    
    res = { 'label': label }
    
    if len(contents):
        res['contents'] = contents
    
    if len(sub):
        subcats = [subtree(st) for st in sub]
        subcats = [sc for sc in subcats if len(sc.get('contents',[])) or len(sc.get('themes',[]))]
        res['themes'] = subcats
    
    return res

def filltree(request):
    
    tree = request.GET.get('tree','{}')
    
    treeobj = json.loads(tree)
    
    res = subtree(treeobj)
        
    return HttpResponse(content=json.dumps(res), content_type='application/json')