diff -r 73f19fa4f997 -r 8f77cf71ab02 src/hdalab/views/ajax.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/hdalab/views/ajax.py Tue Jun 17 10:25:33 2014 +0200 @@ -0,0 +1,458 @@ +# -*- coding: utf-8 -*- +''' +Created on Jan 31, 2012 + +@author: ymh +''' +from django.conf import settings +from django.core.cache import cache +from django.db.models import Q, Count, Min +from django.http import HttpResponse +from hdabo.models import Tag, Datasheet, TaggedSheet +from hdalab.models import HdaSession, Country, TagYears, DatasheetExtras +from hdalab.models.dataviz import DbpediaFieldsTranslation, DbpediaFields +from hdalab.models.categories import WpCategory, WpCategoryInclusion, TagWpCategory +from hdalab.utils import fix_cache_key +import copy +import django.utils.simplejson as json +import hmac +import itertools +import uuid + +def tagtranslation(request): + + lang = request.GET.get('lang',request.LANGUAGE_CODE) + labels = request.GET.get('labels',None) + + if not labels: + return HttpResponse(content=json.dumps({}), mimetype='application/json') + + labelslist = [lbl.strip() for lbl in labels.split(",")] + masters = [] + + for lbl in labelslist: + labelqs = Tag.objects.select_related('dbpedia_fields').filter(~Q(dbpedia_uri = None), label__iexact = lbl)[0:1] + if len(labelqs) > 0: + tag = labelqs.get() + if tag.dbpedia_fields: + masters.append(tag.dbpedia_fields) + + translationqs = DbpediaFieldsTranslation.objects.select_related("master", "master__tag").filter(master__in = masters, language_code=lang) + + translations = dict([(t.master.label, t.label) for t in translationqs]) + + return HttpResponse(content=json.dumps(translations), mimetype='application/json') + +def subcat(category, globtags, level, max_level ): + # recursive function used by cattree + catlabel = category.label + tags = Tag.objects.filter(wp_categories__wp_category = category).distinct() + taglabels = [k for k in dict([(t.label,t.label) for t in tags])] + resobj = { + 'label': category.label, + 'themes': [], + 'contents': [] + } + for label in taglabels: + if label == catlabel: + globtags[label] = {'level': level, 'access': resobj } + else: + tag_in_list = {'label' : label, 'contents': []} + resobj['themes'].append(tag_in_list) + globtags[label] = {'level': (level + 1), 'access': tag_in_list } + + if level < max_level: + subcats = WpCategory.objects.filter(parent_categories__parent_category = category) + resobj['themes'] += [subcat(subcats[i], globtags, level + 1, max_level ) for i in range(len(subcats))] + return resobj + +def cleantags(category): + if category.has_key('contents') and len(category['contents']) == 0: + del category['contents'] + if category.has_key('contents'): + category['contents'] = sorted(category['contents'], key=lambda content: -content['score']) + if category.has_key('themes'): + themes = [] + for theme in category['themes']: + clean_theme = cleantags(theme) + if clean_theme.has_key('themes') or clean_theme.has_key('contents'): + themes.append(clean_theme) + category['themes'] = sorted(themes, key=lambda cat: cat['label']) + if len(category['themes']) == 0: + del category['themes'] + return category + +def cattree(request): + # Gets the category tree from a label + ROOT_MAX_TAG_ORDER = 8 + MAX_TAG_ORDER = 8 + MAX_LEVEL = 3 + LEVEL_COEFF = 5 + label = request.GET.get('label', None) + lowerlabel = label.lower() + globtags = {} + resobj = None + master_category = WpCategory.objects.filter(label__iexact=label)[0:1] + if len(master_category): + resobj = subcat(master_category[0], globtags, 1, MAX_LEVEL ) + +# tag_list = [k for k in globtags] + +# if len(tag_list): + contents = [] +# datasheets = Datasheet.objects.filter(validated = True, taggedsheet__tag__label__in = tag_list, taggedsheet__order__lte = MAX_TAG_ORDER).distinct() + datasheets = Datasheet.objects.filter(validated = True, taggedsheet__tag__label__iexact = label, taggedsheet__order__lte = ROOT_MAX_TAG_ORDER).select_related('organisation').distinct() + for datasheet in datasheets: + # Calculating where we add the datasheet in the tree + maintag = None + maintagscore = -5 + dsscore = 0 + rootscore = 0 + for ts in TaggedSheet.objects.select_related('tag','datasheet').filter(datasheet__id=datasheet.id,order__lte=MAX_TAG_ORDER): + label = ts.tag.label + if globtags.has_key(label): + score = LEVEL_COEFF * globtags[label]['level'] - ts.order + if score > maintagscore: + maintagscore = score + maintag = label + dsscore = (MAX_TAG_ORDER - ts.order) + if label.lower() == lowerlabel: + rootscore = (ROOT_MAX_TAG_ORDER - ts.order) + if maintag is not None: + globtags[maintag]['access']['contents'].append({ + 'id': datasheet.id, + 'title': datasheet.title, + 'url': datasheet.url, + 'description': datasheet.description, + 'hda_id': datasheet.hda_id, + 'organization': datasheet.organisation.name, + 'organization_url': datasheet.organisation.website, + 'score': max(dsscore, rootscore) + }) + cleantags(resobj) + +# resobj['contents'] = [{'id': d.id, 'title': d.title, 'tags': [t.label for t in d.tags.filter(taggedsheet__order__lte=5)]} for d in datasheets] + + return HttpResponse(content=json.dumps(resobj), mimetype='application/json') + +def sessioninfo(request): + + data = json.loads(request.GET.get('data', "{}")) + write = False + + if 'sessionid' in request.GET: + request.session['sessionid'] = request.GET['sessionid'] + if 'sessionkey' in request.GET: + request.session['sessionkey'] = request.GET['sessionkey'] + + if 'sessionid' in request.session: + sessionid = request.session['sessionid'] + + if HdaSession.objects.filter(sessionid=sessionid).count() == 1: + sessionkey = request.session.get('sessionkey',None) + hm = hmac.new(settings.SECRET_KEY, sessionid) + if hm.hexdigest() == sessionkey: + write = True + else: + del request.session['sessionid'] + + if 'sessionid' not in request.session: + sessionid = unicode(uuid.uuid1()) + HdaSession.objects.create(sessionid=sessionid, data=json.dumps({})) + write = True + request.session['sessionid'] = sessionid + request.session['sessionkey'] = hmac.new(settings.SECRET_KEY, sessionid).hexdigest() + + if write and data: + HdaSession.objects.filter(sessionid=sessionid).update(data=json.dumps(data)) + else: + data = HdaSession.objects.get(sessionid=sessionid).data + data = json.loads(data) if data else {} + + resobj = {'data': data, "write_allowed" : write, "sessionid": sessionid } + if write: + resobj['sessionkey'] = request.session['sessionkey'] + + return HttpResponse(content=json.dumps(resobj), mimetype='application/json') + + +def tagsearch(request): + + q = request.GET.get('term',None) + maxcount = int(request.GET.get('count','40')) + lang = request.GET.get('lang',request.LANGUAGE_CODE) + + stemming_langs = [ 'fr', 'en', 'de', 'it' ] + # For Japanese, there are no word boundaries, we should not use the regexp in that case + no_translate_langs = [ 'fr' ] + + if q: + lq = q.lower() + qs = Tag.objects.select_related('dbpedia_fields').filter(datasheet__validated=True) + qrx = '(\\m|\\b)%s'%q + if lang in no_translate_langs: + if lang in stemming_langs: + qs = qs.filter( label__iregex = qrx ) + else: + qs = qs.filter( label__icontains = q ) + else: + if lang in stemming_langs: + qs = qs.filter(dbpedia_fields__translations__label__iregex=qrx, dbpedia_fields__translations__language_code=lang, dbpedia_fields__translations__is_label_translated = True) + else: + qs = qs.filter(dbpedia_fields__translations__label__icontains=q, dbpedia_fields__translations__language_code=lang, dbpedia_fields__translations__is_label_translated = True) + else: + qs = Tag.objects.filter(~Q(dbpedia_uri = None)) + + qs = qs.annotate(nb=Count('datasheet',distinct=True)).order_by('-nb')[:maxcount] + + qslist = list(qs) + + if lang in no_translate_langs: + translations = {} + else: + transqs = DbpediaFieldsTranslation.objects.filter(master__tag__in = qslist, language_code=lang, is_label_translated=True).select_related("master") + translations = dict([(tr.master.tag_id, {'label':tr.label,'abstract':tr.abstract, 'is_label_translated': tr.is_label_translated}) for tr in transqs]) + + res = [] + + for t in qslist: + dbfields = t.dbpedia_fields + resobj = {'original_label':t.label,'nb':t.nb} + resobj['thumbnail'] = dbfields.thumbnail if dbfields is not None else None + +# if t.id in translations and not translations[t.id].get('is_label_translated', True): +# continue + if t.id in translations: + resobj['value'] = translations[t.id]['label'] + resobj['abstract'] = translations[t.id]['abstract'] + else: + resobj['value'] = t.label + resobj['abstract'] = dbfields.abstract if dbfields is not None else None + if q is None or resobj['value'].lower().find(lq) != -1: + res.append(resobj) + + return HttpResponse(content=json.dumps(res), mimetype='application/json') + +def catsearch(request): + + q = request.GET.get('term',None) + + # On ne récupère que les catégories qui sont également des tags + qrx = '(\\m|\\b)%s'%q + qs = Tag.objects.filter(label__iregex=q) + + labels = [tag.label for tag in qs] + + qs = WpCategory.objects.annotate(nb=Count('child_categories__child_category__tags')).filter(label__in = labels, nb__gt=0) + + res = [{'value':t.label} for t in qs] + + return HttpResponse(content=json.dumps(res), mimetype='application/json') + +def filter(request): + + lang = request.GET.get('lang',request.LANGUAGE_CODE) + periode = request.GET.get('period',None) + label = request.GET.get('label', None) + country = request.GET.get('country', None) + contentlist = request.GET.get('contentlist', None) + max_tag_order = int(request.GET.get('mto', '12')) + content_count = request.GET.get('contentcount', 12) + tag_count = request.GET.get('tagcount', 30) + no_translate_langs = [ 'fr' ] + + key_parts = ("filter",lang,periode,label,country,contentlist,max_tag_order,content_count,tag_count) + key_parts = [unicode(p).encode("utf-8") for p in key_parts] + + cache_key = fix_cache_key("-".join(key_parts)) + + outputstr = cache.get(cache_key) + + if outputstr is None: + + matchtagids = [] + + tagqs = Tag.objects.exclude(category__label__in = ['Datation', 'Localisation', 'Discipline artistique']).filter(~Q(dbpedia_uri = None)) + countryqs = Country.objects + discqs = Tag.objects.filter(~Q(dbpedia_uri = None), category__label = u'Discipline artistique').select_related('dbpedia_fields') + yearqs = TagYears.objects + + contentqs = Datasheet.objects.filter(validated=True) + labeltranslations = [] + + if label or periode or country or contentlist : + matchtagqslist = [] + + if periode: + years = periode.split(",") + start_year = int(years[0]) + end_year = int(years[0:2][-1]) + delta = max(1, (end_year-start_year)/2) + minstart = start_year - delta + maxend = end_year + delta + matchtagqs = Tag.objects.filter(~Q(dbpedia_uri = None), + years__end_year__gte = start_year, + years__start_year__lte = end_year, + years__end_year__lte = maxend, + years__start_year__gte = minstart, + ) + matchtagqslist.append(matchtagqs) + + if label: + for txtlbl in label.split(","): + matchtagqs = Tag.objects.select_related('dbpedia_fields').filter(~Q(dbpedia_uri = None), label__iexact = txtlbl.strip()) + matchtagqslist.append(matchtagqs) + + if country: + for country_uri in country.split(","): + matchtagqs = Tag.objects.filter(~Q(dbpedia_uri = None),locatedin__country__dbpedia_uri = country_uri) + matchtagids += [t.id for t in matchtagqs if t.id not in matchtagids] + matchtagqslist.append(matchtagqs) + if contentlist: + contentqs = contentqs.filter(id__in = contentlist.split(",")) + + tagcond = None + tagcondid = None + for matchtagqs in matchtagqslist: + newcond = Q(id__in = TaggedSheet.objects.filter(tag__in = copy.deepcopy(matchtagqs), order__lte = max_tag_order).values('datasheet_id')) + newcondid = Q(id__in = matchtagqs) + tagcond = newcond if tagcond is None else (tagcond & newcond) + tagcondid = newcondid if tagcondid is None else (tagcondid | newcondid) + + contentqs = contentqs.filter(tagcond).distinct() + matchtagidsqs = list(Tag.objects.select_related("dbpedia_fields").only("id").filter(tagcondid)) + matchtagids = [t.id for t in matchtagidsqs] + + if lang not in no_translate_langs: + masters = [t.dbpedia_fields for t in matchtagidsqs if t.dbpedia_fields is not None] + + translationqs = DbpediaFieldsTranslation.objects.select_related("master", "master__tag").filter(master__in = masters, language_code=lang) + labeltranslations = [{'label':t.master.label, 'translated_label':t.label} for t in translationqs] + + tagqs = tagqs.filter(datasheet__in = contentqs) + countryqs = countryqs.filter(includes__tag__taggedsheet__datasheet__in = contentqs) + discqs = discqs.filter(datasheet__in = contentqs) + yearqs = yearqs.filter(tag__taggedsheet__datasheet__in = contentqs) + + if contentlist is None: + contentqs.order_by('?') + + cont_count = contentqs.count() + + contenus = dict([(content.id, {'score' : 0, 'tags' : [], 'hda_id': content.hda_id, 'id':content.id, 'title': content.title, 'description': content.description, 'url': content.url}) for content in contentqs[0:content_count]]) + contentids = contenus.keys() + + qs = DatasheetExtras.objects.select_related('insee').filter(datasheet__in = contentids) + for dse in qs: + contenus[dse.datasheet_id]['coords'] = {'city_name': dse.insee.city_name, 'latitude': dse.insee.latitude, 'longitude': dse.insee.longitude} + + qs = list(TaggedSheet.objects.select_related('tag', 'tag__dbpedia_fields').filter(datasheet__in = contentids, order__lte = max_tag_order).order_by('order')) + + translations = {} + + if lang not in no_translate_langs: + transqs = DbpediaFieldsTranslation.objects.filter(master__in = [ts.tag.dbpedia_fields for ts in qs], language_code = lang) + translations = dict([(trans.master_id,trans.label) for trans in transqs]) + + for ts in qs: + match_tag = ts.tag.id in matchtagids + contenus[ts.datasheet_id]['tags'].append({'id': ts.tag.id, 'label':ts.tag.label, 'order':ts.order, 'match': match_tag , 'translated_label': translations.get(ts.tag.dbpedia_fields.id, ts.tag.label) if ts.tag.dbpedia_fields is not None else ts.tag.label}) + + if match_tag: + contenus[ts.datasheet_id]['score'] += 2*max_tag_order - ts.order + + if contentlist is None: + contenus = sorted(contenus.values(),key=lambda e: -e['score']) + else: + contenus = contenus.values() + + #tagqs = tagqs.annotate(nb=Count('datasheet')).order_by('-nb')[:tag_count] + tagqs = tagqs.annotate(nb=Count('datasheet')).order_by('-nb').only('id','label')[:tag_count] + #.select_related('dbpedia_fields') + # hack to add only necessary fields in the group by + # contournement bug https://code.djangoproject.com/ticket/17144 + tagqs.query.clear_select_fields() + tagqs.query.add_fields(['id','label'], False) + tagqs.query.set_group_by() + + tagqslist = list(tagqs) + + dbpediafields = dict([(df.tag_id, df) for df in DbpediaFields.objects.filter(tag__in = tagqslist)]) + + if lang not in no_translate_langs: + transqs = DbpediaFieldsTranslation.objects.filter(master__in = dbpediafields.values(), language_code = lang) + translations = dict([(trans.master_id,trans.label) for trans in transqs]) + + tags = [{'id': tag.id, 'label': tag.label, 'score': tag.nb, 'translated_label': translations.get(dbpediafields[tag.id].id, tag.label) if tag.id in dbpediafields else tag.label} for tag in tagqslist] + + countryqs = countryqs.annotate(nb=Count('includes__tag__taggedsheet')) + countries = dict([(country.dbpedia_uri, country.nb) for country in countryqs]) + + discqslist = list(discqs.annotate(nb=Count('taggedsheet')).order_by('-nb')[:10]) + + if lang not in no_translate_langs: + transqs = DbpediaFieldsTranslation.objects.filter(master__in = [tag.dbpedia_fields for tag in discqslist], language_code = lang) + translations = dict([(trans.master_id,trans.label) for trans in transqs]) + + disciplines = [{'label':tag.label,'score':tag.nb, 'translated_label': translations.get(tag.dbpedia_fields.id, tag.label) if tag.dbpedia_fields is not None else tag.label} for tag in discqslist] + + years = {} + yearqs = yearqs.annotate(nb=Count('tag__taggedsheet')) + for ty in yearqs: + for year in range(ty.start_year, ty.end_year): + years[year] = ty.nb + (years[year] if year in years else 0) + + yearchange = [] + for year in sorted(years.keys()): + score = years[year] + if year < 2011: + if (year-1 not in years and score != 0) or (year-1 in years and years[year-1] != score): + yearchange.append({'year': year, 'score': score}) + if year+1 not in years and year != -1 and score != 0: + yearchange.append({'year': year+1, 'score': 0}) + + tag_translations = {} + for t in itertools.chain(labeltranslations,disciplines,tags): + tag_translations[t['label']] = t['translated_label'] + for c in contenus: + for t in c['tags']: + tag_translations[t['label']] = t['translated_label'] + + output = {'count': cont_count, 'contents': contenus, 'tags':tags, 'sparkline':yearchange, 'countries':countries, 'disciplines':disciplines, 'tagtranslations': tag_translations} + outputstr = json.dumps(output) + cache.set(cache_key, outputstr) + + return HttpResponse(content=outputstr, mimetype='application/json') + +def subtree(tree): + MAX_TAG_ORDER = 16 + label = tree['label'] + sub = tree.get('contents',[]) + + datasheets = Datasheet.objects.filter(validated = True, taggedsheet__tag__label__iexact = label, taggedsheet__order__lte = MAX_TAG_ORDER).annotate(tagorder=Min('taggedsheet__order')).select_related('organisation').distinct() + + contents = [{ 'description': ds.description, 'title': ds.title, 'url': ds.url, 'score': int((MAX_TAG_ORDER - ds.tagorder)/2), 'id': ds.id, 'hda_id': ds.hda_id, 'organization': ds.organisation.name, 'organization_url': ds.organisation.website } for ds in datasheets] + + contents = sorted(contents, key=lambda e: -e['score']) + + res = { 'label': label } + + if len(contents): + res['contents'] = contents + + if len(sub): + subcats = [subtree(st) for st in sub] + subcats = [sc for sc in subcats if len(sc.get('contents',[])) or len(sc.get('themes',[]))] + res['themes'] = subcats + + return res + +def filltree(request): + + tree = request.GET.get('tree','{}') + + treeobj = json.loads(tree) + + res = subtree(treeobj) + + return HttpResponse(content=json.dumps(res), mimetype='application/json')