src/p4l/search/index.py
author ymh <ymh.work@gmail.com>
Fri, 11 Oct 2013 15:01:52 +0200
changeset 152 ab9832ca5ca6
parent 131 f1854630734f
permissions -rw-r--r--
Added tag V00.04 for changeset 8d74009f8f46
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
113
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
126
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
     2
#
131
f1854630734f change copyright notice
ymh <ymh.work@gmail.com>
parents: 126
diff changeset
     3
# Copyright IRI (c) 2013
126
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
     4
#
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
     5
# contact@iri.centrepompidou.fr
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
     6
#
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
     7
# This software is governed by the CeCILL-B license under French law and
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
     8
# abiding by the rules of distribution of free software.  You can  use, 
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
     9
# modify and/ or redistribute the software under the terms of the CeCILL-B
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    10
# license as circulated by CEA, CNRS and INRIA at the following URL
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    11
# "http://www.cecill.info". 
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    12
#
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    13
# As a counterpart to the access to the source code and  rights to copy,
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    14
# modify and redistribute granted by the license, users are provided only
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    15
# with a limited warranty  and the software's author,  the holder of the
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    16
# economic rights,  and the successive licensors  have only  limited
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    17
# liability. 
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    18
#
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    19
# In this respect, the user's attention is drawn to the risks associated
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    20
# with loading,  using,  modifying and/or developing or reproducing the
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    21
# software by the user in light of its specific status of free software,
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    22
# that may mean  that it is complicated to manipulate,  and  that  also
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    23
# therefore means  that it is reserved for developers  and  experienced
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    24
# professionals having in-depth computer knowledge. Users are therefore
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    25
# encouraged to load and test the software's suitability as regards their
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    26
# requirements in conditions enabling the security of their systems and/or 
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    27
# data to be ensured and,  more generally, to use and operate it in the 
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    28
# same conditions as regards security. 
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    29
#
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    30
# The fact that you are presently reading this means that you have had
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    31
# knowledge of the CeCILL-B license and that you accept its terms.
a345f1a67bf1 Python licence headers
ymh <ymh.work@gmail.com>
parents: 117
diff changeset
    32
#
113
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
from haystack import indexes
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
from p4l.models import Record
114
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    38
from p4l.utils import strip_accents, get_labels_for_uris, safe_cache_key
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    39
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    40
from django.core.cache import get_cache
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    41
from django.conf import settings
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    42
import logging
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    43
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    44
logger = logging.getLogger(__name__)
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    45
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    46
def get_organizations_label(uris):
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    47
    cache = get_cache('indexation')
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    48
    
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    49
    res = {}
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    50
    missing_uris = []
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    51
    
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    52
    for uri in uris: 
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    53
        label = cache.get(safe_cache_key(uri))
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    54
        if label is not None:
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    55
            res[uri] = label
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    56
        else:
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    57
            missing_uris.append(uri)
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    58
            
117
0a4e7d6ebe80 - Do not create object for new
ymh <ymh.work@gmail.com>
parents: 114
diff changeset
    59
    new_labels = get_labels_for_uris(missing_uris, settings.RDF_SCHEMES['organizations'], None, True)
114
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    60
    for k,v in new_labels.iteritems():
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    61
        cache.set(safe_cache_key(k),v)
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    62
        res[k] = v
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    63
    
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    64
    return res
113
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
114
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    67
class RecordIndex(indexes.SearchIndex, indexes.Indexable):    
113
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
    text = indexes.CharField(document=True, use_template=True, stored=False)
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
    identifier = indexes.CharField(model_attr="identifier", stored=True)
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
    titles = indexes.MultiValueField(model_attr="get_titles", stored=False)
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    72
    titles_src = indexes.MultiValueField(model_attr="get_titles", stored=True, indexed=False)
114
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    73
    authors = indexes.MultiValueField(model_attr="all_authors", stored=False)
113
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
    
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
    years = indexes.MultiValueField(model_attr="get_imprints_years", indexed=False, stored=True)
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
    
114
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    77
    def prepare(self, obj):
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    78
        authors = obj.get_authors() + get_organizations_label(obj.get_corporate_authors()).values()
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    79
        obj.all_authors = [strip_accents(unicode(v)) for v in authors] 
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    80
        return indexes.SearchIndex.prepare(self, obj)
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    81
    
113
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
    def prepare_titles(self, obj):
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
        return [strip_accents(v) for v in obj.get_titles()]
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
    def get_model(self):
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
        return Record
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
    def get_updated_field(self):
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
        return "modification_date"
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
    
c05567404888 First version of indexation. Replace the list view by a search view
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
    def index_queryset(self, using=None):
114
93b45b4f423c add corporate authors and small adjustments
ymh <ymh.work@gmail.com>
parents: 113
diff changeset
    92
        return Record.objects.using(using).all().prefetch_related("imprints","authors", "titles", "corporateAuthors")