# -*- coding: utf-8 -*-
'''
Created on Jan 31, 2012
@author: ymh
'''
from django.conf import settings
from django.core.cache import cache
from django.db.models import Q, Count
from django.http import HttpResponse
from hdabo.models import Tag, Datasheet, TaggedSheet
from hdalab.models import HdaSession, Country, TagYears, DatasheetExtras
from hdalab.models.dataviz import DbpediaFieldsTranslation, DbpediaFields
from hdalab.models.categories import WpCategory, WpCategoryInclusion, TagWpCategory
from hdalab.utils import fix_cache_key
import copy
import django.utils.simplejson as json
import hmac
import itertools
import uuid
def taginfo(request):
label = request.GET.get('label', None)
resobj = {'requested_label' : label}
resobj["content_count"] = Datasheet.objects.filter(taggedsheet__tag__label__iexact = label).distinct().count()
res = Tag.objects.select_related('dbpedia_fields').filter(~Q(dbpedia_uri = None), label__iexact = label).order_by('-dbpedia_uri')[0:1]
if len(res) == 1:
restag = res.get()
resobj["dbpedia_uri"] = restag.dbpedia_uri
if resobj["dbpedia_uri"] is not None and restag.dbpedia_fields is not None:
dbfield = restag.dbpedia_fields
resobj["abstract"] = dbfield.abstract
resobj["dbpedia_label"] = dbfield.label
resobj["thumbnail"] = dbfield.thumbnail
transqs = DbpediaFieldsTranslation.objects.filter(master=dbfield, language_code=request.LANGUAGE_CODE)[0:1]
if transqs:
trans = transqs.get()
resobj['translated_abstract'] = trans.abstract
resobj['translated_label'] = trans.label
else:
resobj['translated_abstract'] = dbfield.abstract
resobj['translated_label'] = dbfield.label
#res = Tag.objects.filter(label__iexact = label).order_by('-wikipedia_url')[0:1]
#if len(res) == 1:
# resobj["wikipedia_url"] = res.get().wikipedia_url
if 'translated_label' in resobj:
wikipedia_label = resobj['translated_label']
else:
wikipedia_label = label
wikipedia_label = wikipedia_label[0].capitalize() + wikipedia_label[1:]
resobj["wikipedia_url"] = "http://%s.wikipedia.org/wiki/%s" % (request.LANGUAGE_CODE,wikipedia_label.replace(' ', '_'))
# We don't use links at the moment, so I'll comment this line to speed up requests
# resobj["links"] = [{'subject':tl.subject.label, 'object':tl.object.label} for tl in TagLinks.objects.select_related().filter(Q(subject__label__iexact = label) | Q(object__label__iexact = label))]
return HttpResponse(content=json.dumps(resobj), mimetype='application/json')
def tagtranslation(request):
labels = request.GET.get('labels',None)
if not labels:
return HttpResponse(content=json.dumps({}), mimetype='application/json')
labelslist = [lbl.strip() for lbl in labels.split(",")]
masters = []
for lbl in labelslist:
labelqs = Tag.objects.select_related('dbpedia_fields').filter(~Q(dbpedia_uri = None), label__iexact = lbl)[0:1]
if len(labelqs) > 0:
tag = labelqs.get()
if tag.dbpedia_fields:
masters.append(tag.dbpedia_fields)
translationqs = DbpediaFieldsTranslation.objects.select_related("master", "master__tag").filter(master__in = masters, language_code=request.LANGUAGE_CODE)
translations = dict([(t.master.label, t.label) for t in translationqs])
return HttpResponse(content=json.dumps(translations), mimetype='application/json')
def subcat(category, globtags, level, max_level ):
# recursive function used by cattree
tags = Tag.objects.select_related('wp_categories__wp_category').filter(wp_categories__wp_category = category)
taglabels = [k for k in dict([(t.label,t.label) for t in tags])]
catlabel = category.label
resobj = {
'category': category.label,
'tags': [],
'contents': []
}
for label in taglabels:
if label == catlabel:
globtags[label] = {'level': level, 'access': resobj }
else:
tag_in_list = {'label' : label, 'contents': []}
resobj['tags'].append(tag_in_list)
globtags[label] = {'level': (level + 1), 'access': tag_in_list }
if level < max_level:
subcats = WpCategory.objects.select_related('parent_categories__parent_category').filter(parent_categories__parent_category = category)
resobj['sub_categories'] = [subcat(subcats[i], globtags, level + 1, max_level ) for i in range(len(subcats))]
return resobj
def cleantags(category):
if category.has_key('contents') and len(category['contents']) == 0:
del category['contents']
if category.has_key('tags'):
category['tags'] = [tag for tag in category['tags'] if len(tag['contents'])]
if len(category['tags']) == 0:
del category['tags']
if category.has_key('sub_categories'):
sub_cats = []
for sub_cat in category['sub_categories']:
cat = cleantags(sub_cat)
if cat.has_key('tags') or cat.has_key('sub_categories') or cat.has_key('contents'):
sub_cats.append(cat)
category['sub_categories'] = sub_cats
if len(category['sub_categories']) == 0:
del category['sub_categories']
return category
def cattree(request):
# Gets the category tree from a label
MAX_TAG_ORDER = 5
MAX_LEVEL = 3
label = request.GET.get('label', None)
globtags = {}
resobj = None
master_category = WpCategory.objects.filter(label__iexact=label)[0:1]
if len(master_category):
resobj = subcat(master_category[0], globtags, 1, MAX_LEVEL )
tag_list = [k for k in globtags]
if len(tag_list):
contents = []
datasheets = Datasheet.objects.select_related('taggedsheet__tag').filter(validated = True, taggedsheet__tag__label__in = tag_list, taggedsheet__order__lte = MAX_TAG_ORDER).distinct()
for datasheet in datasheets:
# Calculating where we add the datasheet in the tree
maintag = None
maintagscore = -5
for ts in TaggedSheet.objects.select_related('tag','datasheet').filter(datasheet__id=datasheet.id,order__lte=MAX_TAG_ORDER):
label = ts.tag.label
if globtags.has_key(label):
score = 3 * globtags[label]['level'] - ts.order
if score > maintagscore:
maintagscore = score
maintag = label
if maintag is not None:
globtags[maintag]['access']['contents'].append({'id': datasheet.id, 'title': datasheet.title, 'url': datasheet.url})
cleantags(resobj)
# resobj['contents'] = [{'id': d.id, 'title': d.title, 'tags': [t.label for t in d.tags.filter(taggedsheet__order__lte=5)]} for d in datasheets]
return HttpResponse(content=json.dumps(resobj), mimetype='application/json')
def sessioninfo(request):
data = json.loads(request.GET.get('data', "{}"))
write = False
if 'sessionid' in request.GET:
request.session['sessionid'] = request.GET['sessionid']
if 'sessionkey' in request.GET:
request.session['sessionkey'] = request.GET['sessionkey']
if 'sessionid' in request.session:
sessionid = request.session['sessionid']
if HdaSession.objects.filter(sessionid=sessionid).count() == 1:
sessionkey = request.session.get('sessionkey',None)
hm = hmac.new(settings.SECRET_KEY, sessionid)
if hm.hexdigest() == sessionkey:
write = True
else:
del request.session['sessionid']
if 'sessionid' not in request.session:
sessionid = unicode(uuid.uuid1())
HdaSession.objects.create(sessionid=sessionid, data=json.dumps({}))
write = True
request.session['sessionid'] = sessionid
request.session['sessionkey'] = hmac.new(settings.SECRET_KEY, sessionid).hexdigest()
if write and data:
HdaSession.objects.filter(sessionid=sessionid).update(data=json.dumps(data))
else:
data = HdaSession.objects.get(sessionid=sessionid).data
data = json.loads(data) if data else {}
resobj = {'data': data, "write_allowed" : write, "sessionid": sessionid }
if write:
resobj['sessionkey'] = request.session['sessionkey']
return HttpResponse(content=json.dumps(resobj), mimetype='application/json')
def tagsearch(request):
q = request.GET.get('term',None)
if q:
lq = q.lower()
qs = Tag.objects.filter(datasheet__validated=True).filter( Q(label__icontains = q ) | Q(dbpedia_fields__translations__label__icontains = q, dbpedia_fields__translations__language_code=request.LANGUAGE_CODE), ~Q(dbpedia_uri = None)) if q else Tag.objects.filter(~Q(dbpedia_uri = None))
qs = qs.annotate(nb=Count('datasheet')).order_by('-nb')[:20]
qslist = list(qs)
transqs = DbpediaFieldsTranslation.objects.filter(master__tag__in = qslist, language_code=request.LANGUAGE_CODE).select_related("master")
translations = dict([(tr.master.tag_id, tr.label) for tr in transqs])
res = []
for t in qslist:
resobj = {'value':t.label,'nb':t.nb}
if t.id in translations:
resobj['label'] = translations[t.id]
else:
resobj['label'] = t.label
if q is None or resobj['label'].lower().find(lq) != -1:
res.append(resobj)
return HttpResponse(content=json.dumps(res), mimetype='application/json')
def catsearch(request):
q = request.GET.get('term',None)
qs = WpCategory.objects.filter(tags__hidden = False).distinct().filter(Q(label__icontains = ' ' + q ) | Q(label__istartswith = q))
# qs = WpCategory.objects.filter(label__istartswith = q).order_by('label')[:40]
res = [{'value':t.label} for t in qs]
# qs = WpCategory.objects.filter(label__icontains = ' ' + q).order_by('label')[:30]
# res += [{'value':t.label} for t in qs]
return HttpResponse(content=json.dumps(res), mimetype='application/json')
def filter(request):
periode = request.GET.get('period',None)
label = request.GET.get('label', None)
country = request.GET.get('country', None)
contentlist = request.GET.get('contentlist', None)
max_tag_order = request.GET.get('mto', 12)
content_count = request.GET.get('contentcount', 12)
tag_count = request.GET.get('tagcount', 30)
key_parts = ("filter",request.LANGUAGE_CODE,periode,label,country,contentlist,max_tag_order,content_count,tag_count)
key_parts = [unicode(p).encode("utf-8") for p in key_parts]
cache_key = fix_cache_key("-".join(key_parts))
outputstr = cache.get(cache_key)
if outputstr is None:
matchtagids = []
tagqs = Tag.objects.exclude(category__label__in = ['Datation', 'Localisation', 'Discipline artistique']).filter(~Q(dbpedia_uri = None))
countryqs = Country.objects
discqs = Tag.objects.filter(~Q(dbpedia_uri = None), category__label = u'Discipline artistique').select_related('dbpedia_fields')
yearqs = TagYears.objects
contentqs = Datasheet.objects.filter(validated=True)
labeltranslations = []
if label or periode or country or contentlist :
matchtagqslist = []
if periode:
years = periode.split(",")
start_year = int(years[0])
end_year = int(years[0:2][-1])
delta = max(1, (end_year-start_year)/2)
minstart = start_year - delta
maxend = end_year + delta
matchtagqs = Tag.objects.filter(~Q(dbpedia_uri = None),
years__end_year__gte = start_year,
years__start_year__lte = end_year,
years__end_year__lte = maxend,
years__start_year__gte = minstart,
)
matchtagqslist.append(matchtagqs)
if label:
for txtlbl in label.split(","):
matchtagqs = Tag.objects.select_related('dbpedia_fields').filter(~Q(dbpedia_uri = None), label__iexact = txtlbl.strip())
matchtagqslist.append(matchtagqs)
if country:
for country_uri in country.split(","):
matchtagqs = Tag.objects.filter(~Q(dbpedia_uri = None),locatedin__country__dbpedia_uri = country_uri)
matchtagids += [t.id for t in matchtagqs if t.id not in matchtagids]
matchtagqslist.append(matchtagqs)
if contentlist:
contentqs = contentqs.filter(id__in = contentlist.split(","))
tagcond = None
tagcondid = None
for matchtagqs in matchtagqslist:
newcond = Q(id__in = TaggedSheet.objects.filter(tag__in = copy.deepcopy(matchtagqs), order__lte = max_tag_order).values('datasheet_id'))
newcondid = Q(id__in = matchtagqs)
tagcond = newcond if tagcond is None else (tagcond & newcond)
tagcondid = newcondid if tagcondid is None else (tagcondid | newcondid)
contentqs = contentqs.filter(tagcond).distinct()
matchtagidsqs = list(Tag.objects.select_related("dbpedia_fields").only("id").filter(tagcondid))
matchtagids = [t.id for t in matchtagidsqs]
masters = [t.dbpedia_fields for t in matchtagidsqs if t.dbpedia_fields is not None]
translationqs = DbpediaFieldsTranslation.objects.select_related("master", "master__tag").filter(master__in = masters, language_code=request.LANGUAGE_CODE)
labeltranslations = [{'label':t.master.label, 'translated_label':t.label} for t in translationqs]
tagqs = tagqs.filter(datasheet__in = contentqs)
countryqs = countryqs.filter(includes__tag__taggedsheet__datasheet__in = contentqs)
discqs = discqs.filter(datasheet__in = contentqs)
yearqs = yearqs.filter(tag__taggedsheet__datasheet__in = contentqs)
if contentlist is None:
contentqs.order_by('?')
cont_count = contentqs.count()
contenus = dict([(content.id, {'score' : 0, 'tags' : [], 'id':content.id, 'title': content.title, 'description': content.description, 'url': content.url}) for content in contentqs[0:content_count]])
contentids = contenus.keys()
qs = DatasheetExtras.objects.select_related('insee').filter(datasheet__in = contentids)
for dse in qs:
contenus[dse.datasheet_id]['coords'] = {'city_name': dse.insee.city_name, 'latitude': dse.insee.latitude, 'longitude': dse.insee.longitude}
qs = list(TaggedSheet.objects.select_related('tag', 'tag__dbpedia_fields').filter(datasheet__in = contentids, order__lte = max_tag_order).order_by('order'))
transqs = DbpediaFieldsTranslation.objects.filter(master__in = [ts.tag.dbpedia_fields for ts in qs], language_code = request.LANGUAGE_CODE)
translations = dict([(trans.master_id,trans.label) for trans in transqs])
for ts in qs:
match_tag = ts.tag.id in matchtagids
contenus[ts.datasheet_id]['tags'].append({'id': ts.tag.id, 'label':ts.tag.label, 'order':ts.order, 'match': match_tag , 'translated_label': translations.get(ts.tag.dbpedia_fields.id, ts.tag.label) if ts.tag.dbpedia_fields is not None else ts.tag.label})
if match_tag:
contenus[ts.datasheet_id]['score'] += 2*max_tag_order - ts.order
if contentlist is None:
contenus = sorted(contenus.values(),key=lambda e: -e['score'])
else:
contenus = contenus.values()
#tagqs = tagqs.annotate(nb=Count('datasheet')).order_by('-nb')[:tag_count]
tagqs = tagqs.annotate(nb=Count('datasheet')).order_by('-nb').only('id','label')[:tag_count]
#.select_related('dbpedia_fields')
# hack to add only necessary fields in the group by
# contournement bug https://code.djangoproject.com/ticket/17144
tagqs.query.clear_select_fields()
tagqs.query.add_fields(['id','label'], False)
tagqs.query.set_group_by()
tagqslist = list(tagqs)
dbpediafields = dict([(df.tag_id, df) for df in DbpediaFields.objects.filter(tag__in = tagqslist)])
transqs = DbpediaFieldsTranslation.objects.filter(master__in = dbpediafields.values(), language_code = request.LANGUAGE_CODE)
translations = dict([(trans.master_id,trans.label) for trans in transqs])
tags = [{'id': tag.id, 'label': tag.label, 'score': tag.nb, 'translated_label': translations.get(dbpediafields[tag.id].id, tag.label) if tag.id in dbpediafields else tag.label} for tag in tagqslist]
countryqs = countryqs.annotate(nb=Count('includes__tag__taggedsheet'))
countries = dict([(country.dbpedia_uri, country.nb) for country in countryqs])
discqslist = list(discqs.annotate(nb=Count('taggedsheet')).order_by('-nb')[:10])
transqs = DbpediaFieldsTranslation.objects.filter(master__in = [tag.dbpedia_fields for tag in discqslist], language_code = request.LANGUAGE_CODE)
translations = dict([(trans.master_id,trans.label) for trans in transqs])
disciplines = [{'label':tag.label,'score':tag.nb, 'translated_label': translations.get(tag.dbpedia_fields.id, tag.label) if tag.dbpedia_fields is not None else tag.label} for tag in discqslist]
years = {}
yearqs = yearqs.annotate(nb=Count('tag__taggedsheet'))
for ty in yearqs:
for year in range(ty.start_year, ty.end_year):
years[year] = ty.nb + (years[year] if year in years else 0)
yearchange = []
for year in sorted(years.keys()):
score = years[year]
if year < 2011:
if (year-1 not in years and score != 0) or (year-1 in years and years[year-1] != score):
yearchange.append({'year': year, 'score': score})
if year+1 not in years and year != -1 and score != 0:
yearchange.append({'year': year+1, 'score': 0})
tag_translations = {}
for t in itertools.chain(labeltranslations,disciplines,tags):
tag_translations[t['label']] = t['translated_label']
for c in contenus:
for t in c['tags']:
tag_translations[t['label']] = t['translated_label']
output = {'count': cont_count, 'contents': contenus, 'tags':tags, 'sparkline':yearchange, 'countries':countries, 'disciplines':disciplines, 'tagtranslations': tag_translations}
outputstr = json.dumps(output)
cache.set(cache_key, outputstr)
return HttpResponse(content=outputstr, mimetype='application/json')