src/jocondelab/search_indexes.py
author ymh <ymh.work@gmail.com>
Fri, 07 Feb 2014 15:24:43 +0100
changeset 352 4e940d53eacd
parent 336 6ffca49f6a0c
permissions -rw-r--r--
upgrade to django 1.6.2
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
336
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
#
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
# Copyright Institut de Recherche et d'Innovation © 2014
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
#
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
# contact@iri.centrepompidou.fr
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
#
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
# Ce code a été développé pour un premier usage dans JocondeLab, projet du 
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
# ministère de la culture et de la communication visant à expérimenter la
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
# recherche sémantique dans la base Joconde
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
# (http://jocondelab.iri-research.org/).
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
#
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
# Ce logiciel est régi par la licence CeCILL-C soumise au droit français et
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
# respectant les principes de diffusion des logiciels libres. Vous pouvez
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
# utiliser, modifier et/ou redistribuer ce programme sous les conditions
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
# de la licence CeCILL-C telle que diffusée par le CEA, le CNRS et l'INRIA 
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
# sur le site "http://www.cecill.info".
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
#
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
# En contrepartie de l'accessibilité au code source et des droits de copie,
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
# de modification et de redistribution accordés par cette licence, il n'est
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
# offert aux utilisateurs qu'une garantie limitée.  Pour les mêmes raisons,
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
# seule une responsabilité restreinte pèse sur l'auteur du programme,  le
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
# titulaire des droits patrimoniaux et les concédants successifs.
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
#
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
# A cet égard  l'attention de l'utilisateur est attirée sur les risques
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
# associés au chargement,  à l'utilisation,  à la modification et/ou au
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
# développement et à la reproduction du logiciel par l'utilisateur étant 
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
# donné sa spécificité de logiciel libre, qui peut le rendre complexe à 
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
# manipuler et qui le réserve donc à des développeurs et des professionnels
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
# avertis possédant  des  connaissances  informatiques approfondies.  Les
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
# utilisateurs sont donc invités à charger  et  tester  l'adéquation  du
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
# logiciel à leurs besoins dans des conditions permettant d'assurer la
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
# sécurité de leurs systèmes et ou de leurs données et, plus généralement, 
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
# à l'utiliser et l'exploiter dans les mêmes conditions de sécurité. 
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
#
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
# Le fait que vous puissiez accéder à cet en-tête signifie que vous avez 
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
# pris connaissance de la licence CeCILL-C, et que vous en avez accepté les
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
# termes.
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
#
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
from unidecode import unidecode
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
'''
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
@author: ymh
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
'''
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
from haystack import indexes
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
from jocondelab.models import DbpediaFields
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
class DbpediaFieldsIndex(indexes.SearchIndex, indexes.Indexable):
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
    text = indexes.CharField(document=True, use_template=True)
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
    label = indexes.CharField(model_attr='label', null=True)
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
    label_trans = indexes.CharField(model_attr='label', null=True, indexed=True, stored=False)
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
    label_ngram = indexes.NgramField(model_attr='label', null=True)
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
    label_edge = indexes.EdgeNgramField(model_attr='label', null=True)
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
    language_code = indexes.CharField(model_attr='language_code', null=True)
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
    dbpedia_uri = indexes.CharField(model_attr='dbpedia_uri', null=True, indexed=False, stored=True)
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
    
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
    def get_model(self):
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
        return DbpediaFields
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
    
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
    def index_queryset(self, using=None):
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
        """Used when the entire index for model is updated."""
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
        return self.get_model().objects.filter(term__nb_illustrated_notice__gt=0)
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
    
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
    def prepare_label_trans(self, obj):
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
        if obj.label is not None:
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
            return unidecode(obj.label.lower())
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
        else:
6ffca49f6a0c use elasticsearch for label and free search
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
            return None