src/hdalab/views/ajax.py
changeset 271 8f77cf71ab02
parent 260 a15e8cb98525
child 272 1c774f7a0341
equal deleted inserted replaced
265:73f19fa4f997 271:8f77cf71ab02
       
     1 # -*- coding: utf-8 -*-
       
     2 '''
       
     3 Created on Jan 31, 2012
       
     4 
       
     5 @author: ymh
       
     6 '''
       
     7 from django.conf import settings
       
     8 from django.core.cache import cache
       
     9 from django.db.models import Q, Count, Min
       
    10 from django.http import HttpResponse
       
    11 from hdabo.models import Tag, Datasheet, TaggedSheet
       
    12 from hdalab.models import HdaSession, Country, TagYears, DatasheetExtras
       
    13 from hdalab.models.dataviz import DbpediaFieldsTranslation, DbpediaFields
       
    14 from hdalab.models.categories import WpCategory, WpCategoryInclusion, TagWpCategory
       
    15 from hdalab.utils import fix_cache_key
       
    16 import copy
       
    17 import django.utils.simplejson as json
       
    18 import hmac
       
    19 import itertools
       
    20 import uuid
       
    21 
       
    22 def tagtranslation(request):
       
    23     
       
    24     lang = request.GET.get('lang',request.LANGUAGE_CODE)
       
    25     labels = request.GET.get('labels',None)
       
    26 
       
    27     if not labels:
       
    28         return HttpResponse(content=json.dumps({}), mimetype='application/json')
       
    29     
       
    30     labelslist = [lbl.strip() for lbl in labels.split(",")]
       
    31     masters = []
       
    32     
       
    33     for lbl in labelslist:
       
    34         labelqs = Tag.objects.select_related('dbpedia_fields').filter(~Q(dbpedia_uri = None), label__iexact = lbl)[0:1]
       
    35         if len(labelqs) > 0:
       
    36             tag = labelqs.get()
       
    37             if tag.dbpedia_fields:
       
    38                 masters.append(tag.dbpedia_fields)
       
    39     
       
    40     translationqs = DbpediaFieldsTranslation.objects.select_related("master", "master__tag").filter(master__in = masters, language_code=lang)
       
    41     
       
    42     translations = dict([(t.master.label, t.label) for t in translationqs])
       
    43     
       
    44     return HttpResponse(content=json.dumps(translations), mimetype='application/json')
       
    45 
       
    46 def subcat(category, globtags, level, max_level ):
       
    47     # recursive function used by cattree
       
    48     catlabel = category.label
       
    49     tags = Tag.objects.filter(wp_categories__wp_category = category).distinct()
       
    50     taglabels = [k for k in dict([(t.label,t.label) for t in tags])]
       
    51     resobj = {
       
    52           'label': category.label,
       
    53           'themes': [],
       
    54           'contents': []
       
    55       }
       
    56     for label in taglabels:
       
    57         if label == catlabel:
       
    58             globtags[label] = {'level': level, 'access': resobj }
       
    59         else:
       
    60             tag_in_list = {'label' : label, 'contents': []}
       
    61             resobj['themes'].append(tag_in_list)
       
    62             globtags[label] = {'level': (level + 1), 'access': tag_in_list }
       
    63             
       
    64     if level < max_level:
       
    65         subcats = WpCategory.objects.filter(parent_categories__parent_category = category)
       
    66         resobj['themes'] += [subcat(subcats[i], globtags, level + 1, max_level ) for i in range(len(subcats))]
       
    67     return resobj
       
    68 
       
    69 def cleantags(category):
       
    70     if category.has_key('contents') and len(category['contents']) == 0:
       
    71         del category['contents']
       
    72     if category.has_key('contents'):
       
    73         category['contents'] = sorted(category['contents'], key=lambda content: -content['score'])
       
    74     if category.has_key('themes'):
       
    75         themes = []
       
    76         for theme in category['themes']:
       
    77             clean_theme = cleantags(theme)
       
    78             if clean_theme.has_key('themes') or clean_theme.has_key('contents'):
       
    79                 themes.append(clean_theme)
       
    80         category['themes'] = sorted(themes, key=lambda cat: cat['label'])
       
    81         if len(category['themes']) == 0:
       
    82             del category['themes']
       
    83     return category
       
    84 
       
    85 def cattree(request):
       
    86     # Gets the category tree from a label
       
    87     ROOT_MAX_TAG_ORDER = 8
       
    88     MAX_TAG_ORDER = 8
       
    89     MAX_LEVEL = 3
       
    90     LEVEL_COEFF = 5
       
    91     label = request.GET.get('label', None)
       
    92     lowerlabel = label.lower()
       
    93     globtags = {}
       
    94     resobj = None
       
    95     master_category = WpCategory.objects.filter(label__iexact=label)[0:1]
       
    96     if len(master_category):
       
    97         resobj = subcat(master_category[0], globtags, 1, MAX_LEVEL )
       
    98     
       
    99 #    tag_list = [k for k in globtags]
       
   100     
       
   101 #    if len(tag_list):
       
   102     contents = []
       
   103 #    datasheets = Datasheet.objects.filter(validated = True, taggedsheet__tag__label__in = tag_list, taggedsheet__order__lte = MAX_TAG_ORDER).distinct()
       
   104     datasheets = Datasheet.objects.filter(validated = True, taggedsheet__tag__label__iexact = label, taggedsheet__order__lte = ROOT_MAX_TAG_ORDER).select_related('organisation').distinct()
       
   105     for datasheet in datasheets:
       
   106         # Calculating where we add the datasheet in the tree
       
   107         maintag = None
       
   108         maintagscore = -5
       
   109         dsscore = 0
       
   110         rootscore = 0
       
   111         for ts in TaggedSheet.objects.select_related('tag','datasheet').filter(datasheet__id=datasheet.id,order__lte=MAX_TAG_ORDER):
       
   112             label = ts.tag.label
       
   113             if globtags.has_key(label):
       
   114                 score = LEVEL_COEFF * globtags[label]['level'] - ts.order
       
   115                 if score > maintagscore:
       
   116                     maintagscore = score
       
   117                     maintag = label
       
   118                     dsscore = (MAX_TAG_ORDER - ts.order)
       
   119                 if label.lower() == lowerlabel:
       
   120                     rootscore = (ROOT_MAX_TAG_ORDER - ts.order)
       
   121         if maintag is not None:
       
   122             globtags[maintag]['access']['contents'].append({
       
   123                 'id': datasheet.id,
       
   124                 'title': datasheet.title,
       
   125                 'url': datasheet.url,
       
   126                 'description': datasheet.description,
       
   127                 'hda_id': datasheet.hda_id,
       
   128                 'organization': datasheet.organisation.name,
       
   129                 'organization_url': datasheet.organisation.website,
       
   130                 'score': max(dsscore, rootscore)
       
   131             })
       
   132     cleantags(resobj)
       
   133                     
       
   134 #        resobj['contents'] = [{'id': d.id, 'title': d.title, 'tags': [t.label for t in d.tags.filter(taggedsheet__order__lte=5)]} for d in datasheets]
       
   135     
       
   136     return HttpResponse(content=json.dumps(resobj), mimetype='application/json')
       
   137 
       
   138 def sessioninfo(request):
       
   139     
       
   140     data = json.loads(request.GET.get('data', "{}"))
       
   141     write = False
       
   142     
       
   143     if 'sessionid' in request.GET:
       
   144         request.session['sessionid'] = request.GET['sessionid']
       
   145     if 'sessionkey' in request.GET:
       
   146         request.session['sessionkey'] = request.GET['sessionkey']
       
   147         
       
   148     if 'sessionid' in request.session:
       
   149         sessionid = request.session['sessionid']
       
   150         
       
   151         if HdaSession.objects.filter(sessionid=sessionid).count() == 1:
       
   152             sessionkey = request.session.get('sessionkey',None)
       
   153             hm = hmac.new(settings.SECRET_KEY, sessionid)
       
   154             if hm.hexdigest() == sessionkey:
       
   155                 write = True            
       
   156         else:
       
   157             del request.session['sessionid']
       
   158         
       
   159     if 'sessionid' not in request.session:
       
   160         sessionid = unicode(uuid.uuid1())
       
   161         HdaSession.objects.create(sessionid=sessionid, data=json.dumps({}))
       
   162         write = True
       
   163         request.session['sessionid'] = sessionid
       
   164         request.session['sessionkey'] = hmac.new(settings.SECRET_KEY, sessionid).hexdigest()
       
   165         
       
   166     if write and data:
       
   167         HdaSession.objects.filter(sessionid=sessionid).update(data=json.dumps(data))
       
   168     else:
       
   169         data = HdaSession.objects.get(sessionid=sessionid).data
       
   170         data = json.loads(data) if data else {}           
       
   171          
       
   172     resobj = {'data': data, "write_allowed" : write, "sessionid": sessionid }
       
   173     if write:
       
   174         resobj['sessionkey'] = request.session['sessionkey']
       
   175         
       
   176     return HttpResponse(content=json.dumps(resobj), mimetype='application/json')
       
   177 
       
   178 
       
   179 def tagsearch(request):
       
   180     
       
   181     q = request.GET.get('term',None)
       
   182     maxcount = int(request.GET.get('count','40'))
       
   183     lang = request.GET.get('lang',request.LANGUAGE_CODE)
       
   184     
       
   185     stemming_langs = [ 'fr', 'en', 'de', 'it' ]
       
   186     # For Japanese, there are no word boundaries, we should not use the regexp in that case
       
   187     no_translate_langs = [ 'fr' ]
       
   188     
       
   189     if q:
       
   190         lq = q.lower()
       
   191         qs = Tag.objects.select_related('dbpedia_fields').filter(datasheet__validated=True)
       
   192         qrx = '(\\m|\\b)%s'%q
       
   193         if lang in no_translate_langs:
       
   194             if lang in stemming_langs:
       
   195                 qs = qs.filter( label__iregex = qrx )
       
   196             else:
       
   197                 qs = qs.filter( label__icontains = q )
       
   198         else:
       
   199             if lang in stemming_langs:
       
   200                 qs = qs.filter(dbpedia_fields__translations__label__iregex=qrx, dbpedia_fields__translations__language_code=lang, dbpedia_fields__translations__is_label_translated = True)
       
   201             else:
       
   202                 qs = qs.filter(dbpedia_fields__translations__label__icontains=q, dbpedia_fields__translations__language_code=lang, dbpedia_fields__translations__is_label_translated = True)
       
   203     else:
       
   204         qs = Tag.objects.filter(~Q(dbpedia_uri = None))
       
   205            
       
   206     qs = qs.annotate(nb=Count('datasheet',distinct=True)).order_by('-nb')[:maxcount]
       
   207     
       
   208     qslist = list(qs)
       
   209     
       
   210     if lang in no_translate_langs:
       
   211         translations = {}
       
   212     else:
       
   213         transqs = DbpediaFieldsTranslation.objects.filter(master__tag__in = qslist, language_code=lang, is_label_translated=True).select_related("master")
       
   214         translations = dict([(tr.master.tag_id, {'label':tr.label,'abstract':tr.abstract, 'is_label_translated': tr.is_label_translated}) for tr in transqs])
       
   215     
       
   216     res = []
       
   217     
       
   218     for t in qslist:
       
   219         dbfields = t.dbpedia_fields
       
   220         resobj = {'original_label':t.label,'nb':t.nb}
       
   221         resobj['thumbnail'] = dbfields.thumbnail if dbfields is not None else None
       
   222         
       
   223 #        if t.id in translations and not translations[t.id].get('is_label_translated', True):
       
   224 #            continue
       
   225         if t.id in translations:
       
   226             resobj['value'] = translations[t.id]['label']
       
   227             resobj['abstract'] = translations[t.id]['abstract']
       
   228         else:
       
   229             resobj['value'] = t.label
       
   230             resobj['abstract'] = dbfields.abstract if dbfields is not None else None
       
   231         if q is None or resobj['value'].lower().find(lq) != -1:
       
   232             res.append(resobj)
       
   233     
       
   234     return HttpResponse(content=json.dumps(res), mimetype='application/json')
       
   235 
       
   236 def catsearch(request):
       
   237     
       
   238     q = request.GET.get('term',None)
       
   239     
       
   240     # On ne récupère que les catégories qui sont également des tags
       
   241     qrx = '(\\m|\\b)%s'%q
       
   242     qs = Tag.objects.filter(label__iregex=q)
       
   243     
       
   244     labels = [tag.label for tag in qs]
       
   245     
       
   246     qs = WpCategory.objects.annotate(nb=Count('child_categories__child_category__tags')).filter(label__in = labels, nb__gt=0)
       
   247     
       
   248     res = [{'value':t.label} for t in qs]
       
   249     
       
   250     return HttpResponse(content=json.dumps(res), mimetype='application/json')
       
   251 
       
   252 def filter(request):
       
   253     
       
   254     lang = request.GET.get('lang',request.LANGUAGE_CODE)
       
   255     periode = request.GET.get('period',None)
       
   256     label = request.GET.get('label', None)
       
   257     country = request.GET.get('country', None)
       
   258     contentlist = request.GET.get('contentlist', None)
       
   259     max_tag_order = int(request.GET.get('mto', '12'))
       
   260     content_count = request.GET.get('contentcount', 12)
       
   261     tag_count = request.GET.get('tagcount', 30)
       
   262     no_translate_langs = [ 'fr' ]
       
   263     
       
   264     key_parts = ("filter",lang,periode,label,country,contentlist,max_tag_order,content_count,tag_count)
       
   265     key_parts = [unicode(p).encode("utf-8") for p in key_parts]
       
   266     
       
   267     cache_key = fix_cache_key("-".join(key_parts))
       
   268     
       
   269     outputstr = cache.get(cache_key)
       
   270     
       
   271     if outputstr is None:
       
   272         
       
   273         matchtagids = []
       
   274         
       
   275         tagqs = Tag.objects.exclude(category__label__in = ['Datation', 'Localisation', 'Discipline artistique']).filter(~Q(dbpedia_uri = None))
       
   276         countryqs = Country.objects
       
   277         discqs = Tag.objects.filter(~Q(dbpedia_uri = None), category__label = u'Discipline artistique').select_related('dbpedia_fields')
       
   278         yearqs = TagYears.objects
       
   279         
       
   280         contentqs = Datasheet.objects.filter(validated=True)
       
   281         labeltranslations = []
       
   282     
       
   283         if label or periode or country or contentlist :
       
   284             matchtagqslist = []
       
   285             
       
   286             if periode:
       
   287                 years = periode.split(",")
       
   288                 start_year = int(years[0])
       
   289                 end_year = int(years[0:2][-1])
       
   290                 delta = max(1, (end_year-start_year)/2)
       
   291                 minstart = start_year - delta
       
   292                 maxend = end_year + delta
       
   293                 matchtagqs = Tag.objects.filter(~Q(dbpedia_uri = None),
       
   294                                                 years__end_year__gte = start_year, 
       
   295                                                 years__start_year__lte = end_year,
       
   296                                                 years__end_year__lte = maxend,
       
   297                                                 years__start_year__gte = minstart,
       
   298                                                 )
       
   299                 matchtagqslist.append(matchtagqs)
       
   300                 
       
   301             if label:
       
   302                 for txtlbl in label.split(","):
       
   303                     matchtagqs = Tag.objects.select_related('dbpedia_fields').filter(~Q(dbpedia_uri = None), label__iexact = txtlbl.strip())
       
   304                     matchtagqslist.append(matchtagqs)
       
   305                 
       
   306             if country:
       
   307                 for country_uri in country.split(","):
       
   308                     matchtagqs = Tag.objects.filter(~Q(dbpedia_uri = None),locatedin__country__dbpedia_uri = country_uri)
       
   309                     matchtagids += [t.id for t in matchtagqs if t.id not in matchtagids]
       
   310                     matchtagqslist.append(matchtagqs)
       
   311             if contentlist:
       
   312                 contentqs = contentqs.filter(id__in = contentlist.split(","))
       
   313 
       
   314             tagcond = None
       
   315             tagcondid = None
       
   316             for matchtagqs in matchtagqslist:
       
   317                 newcond = Q(id__in = TaggedSheet.objects.filter(tag__in = copy.deepcopy(matchtagqs), order__lte = max_tag_order).values('datasheet_id'))
       
   318                 newcondid = Q(id__in = matchtagqs)
       
   319                 tagcond = newcond if tagcond is None else (tagcond & newcond)
       
   320                 tagcondid = newcondid if tagcondid is None else (tagcondid | newcondid)
       
   321             
       
   322             contentqs = contentqs.filter(tagcond).distinct()
       
   323             matchtagidsqs = list(Tag.objects.select_related("dbpedia_fields").only("id").filter(tagcondid))
       
   324             matchtagids = [t.id for t in matchtagidsqs]
       
   325             
       
   326             if lang not in no_translate_langs:
       
   327                 masters = [t.dbpedia_fields for t in matchtagidsqs if t.dbpedia_fields is not None]
       
   328                 
       
   329                 translationqs = DbpediaFieldsTranslation.objects.select_related("master", "master__tag").filter(master__in = masters, language_code=lang)    
       
   330                 labeltranslations = [{'label':t.master.label, 'translated_label':t.label} for t in translationqs]
       
   331             
       
   332             tagqs = tagqs.filter(datasheet__in = contentqs)
       
   333             countryqs = countryqs.filter(includes__tag__taggedsheet__datasheet__in = contentqs)
       
   334             discqs = discqs.filter(datasheet__in = contentqs)
       
   335             yearqs = yearqs.filter(tag__taggedsheet__datasheet__in = contentqs)
       
   336             
       
   337         if contentlist is None:
       
   338             contentqs.order_by('?')
       
   339                         
       
   340         cont_count = contentqs.count()
       
   341         
       
   342         contenus = dict([(content.id, {'score' : 0, 'tags' : [], 'hda_id': content.hda_id, 'id':content.id, 'title': content.title, 'description': content.description, 'url': content.url}) for content in contentqs[0:content_count]])
       
   343         contentids = contenus.keys()
       
   344         
       
   345         qs = DatasheetExtras.objects.select_related('insee').filter(datasheet__in = contentids)
       
   346         for dse in qs:
       
   347             contenus[dse.datasheet_id]['coords'] = {'city_name': dse.insee.city_name, 'latitude': dse.insee.latitude, 'longitude': dse.insee.longitude}
       
   348         
       
   349         qs = list(TaggedSheet.objects.select_related('tag', 'tag__dbpedia_fields').filter(datasheet__in = contentids, order__lte = max_tag_order).order_by('order'))
       
   350         
       
   351         translations = {}
       
   352         
       
   353         if lang not in no_translate_langs:
       
   354             transqs = DbpediaFieldsTranslation.objects.filter(master__in = [ts.tag.dbpedia_fields for ts in qs], language_code = lang)
       
   355             translations = dict([(trans.master_id,trans.label) for trans in transqs])
       
   356         
       
   357         for ts in qs:
       
   358             match_tag = ts.tag.id in matchtagids
       
   359             contenus[ts.datasheet_id]['tags'].append({'id': ts.tag.id, 'label':ts.tag.label, 'order':ts.order, 'match': match_tag , 'translated_label': translations.get(ts.tag.dbpedia_fields.id, ts.tag.label) if ts.tag.dbpedia_fields is not None else ts.tag.label})
       
   360             
       
   361             if match_tag:
       
   362                 contenus[ts.datasheet_id]['score'] += 2*max_tag_order - ts.order
       
   363             
       
   364         if contentlist is None:
       
   365             contenus = sorted(contenus.values(),key=lambda e: -e['score'])
       
   366         else:
       
   367             contenus = contenus.values()
       
   368     
       
   369         #tagqs = tagqs.annotate(nb=Count('datasheet')).order_by('-nb')[:tag_count]
       
   370         tagqs = tagqs.annotate(nb=Count('datasheet')).order_by('-nb').only('id','label')[:tag_count]
       
   371         #.select_related('dbpedia_fields')
       
   372         # hack to add only necessary fields in the group by
       
   373         # contournement bug https://code.djangoproject.com/ticket/17144
       
   374         tagqs.query.clear_select_fields()
       
   375         tagqs.query.add_fields(['id','label'], False)
       
   376         tagqs.query.set_group_by()
       
   377     
       
   378         tagqslist = list(tagqs)
       
   379         
       
   380         dbpediafields = dict([(df.tag_id, df) for df in DbpediaFields.objects.filter(tag__in = tagqslist)])
       
   381 
       
   382         if lang not in no_translate_langs:
       
   383             transqs = DbpediaFieldsTranslation.objects.filter(master__in = dbpediafields.values(), language_code = lang)
       
   384             translations = dict([(trans.master_id,trans.label) for trans in transqs])
       
   385     
       
   386         tags = [{'id': tag.id, 'label': tag.label, 'score': tag.nb, 'translated_label': translations.get(dbpediafields[tag.id].id, tag.label) if tag.id in dbpediafields else tag.label} for tag in tagqslist]
       
   387     
       
   388         countryqs = countryqs.annotate(nb=Count('includes__tag__taggedsheet'))
       
   389         countries = dict([(country.dbpedia_uri, country.nb) for country in countryqs])
       
   390     
       
   391         discqslist = list(discqs.annotate(nb=Count('taggedsheet')).order_by('-nb')[:10])
       
   392 
       
   393         if lang not in no_translate_langs:
       
   394             transqs = DbpediaFieldsTranslation.objects.filter(master__in = [tag.dbpedia_fields for tag in discqslist], language_code = lang)
       
   395             translations = dict([(trans.master_id,trans.label) for trans in transqs])
       
   396         
       
   397         disciplines = [{'label':tag.label,'score':tag.nb, 'translated_label': translations.get(tag.dbpedia_fields.id, tag.label) if tag.dbpedia_fields is not None else tag.label} for tag in discqslist]
       
   398         
       
   399         years = {}
       
   400         yearqs = yearqs.annotate(nb=Count('tag__taggedsheet'))
       
   401         for ty in yearqs:
       
   402             for year in range(ty.start_year, ty.end_year):
       
   403                 years[year] = ty.nb + (years[year] if year in years else 0)
       
   404                 
       
   405         yearchange = []
       
   406         for year in sorted(years.keys()):
       
   407             score = years[year]
       
   408             if year < 2011:
       
   409                 if (year-1 not in years and score != 0) or (year-1 in years and years[year-1] != score):
       
   410                     yearchange.append({'year': year, 'score': score})
       
   411                 if year+1 not in years and year != -1 and score != 0:
       
   412                     yearchange.append({'year': year+1, 'score': 0})
       
   413     
       
   414         tag_translations = {}
       
   415         for t in itertools.chain(labeltranslations,disciplines,tags):
       
   416             tag_translations[t['label']] = t['translated_label']
       
   417         for c in contenus:
       
   418             for t in c['tags']:
       
   419                 tag_translations[t['label']] = t['translated_label']
       
   420         
       
   421         output = {'count': cont_count, 'contents': contenus, 'tags':tags, 'sparkline':yearchange, 'countries':countries, 'disciplines':disciplines, 'tagtranslations': tag_translations}
       
   422         outputstr = json.dumps(output)
       
   423         cache.set(cache_key, outputstr)
       
   424         
       
   425     return HttpResponse(content=outputstr, mimetype='application/json')
       
   426 
       
   427 def subtree(tree):
       
   428     MAX_TAG_ORDER = 16
       
   429     label = tree['label']
       
   430     sub = tree.get('contents',[])
       
   431     
       
   432     datasheets = Datasheet.objects.filter(validated = True, taggedsheet__tag__label__iexact = label, taggedsheet__order__lte = MAX_TAG_ORDER).annotate(tagorder=Min('taggedsheet__order')).select_related('organisation').distinct()
       
   433     
       
   434     contents = [{ 'description': ds.description, 'title': ds.title, 'url': ds.url, 'score': int((MAX_TAG_ORDER - ds.tagorder)/2), 'id': ds.id, 'hda_id': ds.hda_id, 'organization': ds.organisation.name, 'organization_url': ds.organisation.website } for ds in datasheets]
       
   435     
       
   436     contents = sorted(contents, key=lambda e: -e['score'])
       
   437     
       
   438     res = { 'label': label }
       
   439     
       
   440     if len(contents):
       
   441         res['contents'] = contents
       
   442     
       
   443     if len(sub):
       
   444         subcats = [subtree(st) for st in sub]
       
   445         subcats = [sc for sc in subcats if len(sc.get('contents',[])) or len(sc.get('themes',[]))]
       
   446         res['themes'] = subcats
       
   447     
       
   448     return res
       
   449 
       
   450 def filltree(request):
       
   451     
       
   452     tree = request.GET.get('tree','{}')
       
   453     
       
   454     treeobj = json.loads(tree)
       
   455     
       
   456     res = subtree(treeobj)
       
   457         
       
   458     return HttpResponse(content=json.dumps(res), mimetype='application/json')