# HG changeset patch # User ymh # Date 1379800144 -7200 # Node ID 93b45b4f423cbeb45991f7c95fcdde8e231e26fa # Parent c055674048885980842c00bd2ac534ecfdffcb93 add corporate authors and small adjustments diff -r c05567404888 -r 93b45b4f423c src/p4l/management/commands/import_record.py --- a/src/p4l/management/commands/import_record.py Fri Sep 20 22:21:48 2013 +0200 +++ b/src/p4l/management/commands/import_record.py Sat Sep 21 23:49:04 2013 +0200 @@ -11,6 +11,7 @@ from p4l.mapping.parsers import RecordParser, QueryCache from p4l.utils import show_progress import xml.etree.cElementTree as ET +from django.conf import settings logger = logging.getLogger(__name__) @@ -43,6 +44,12 @@ default=False, help= 'preserve existing record' ), + make_option('-i', '--index', + dest= 'index', + action='store_true', + default=False, + help= 'index while importing' + ), ) def __init__(self, *args, **kwargs): @@ -116,6 +123,14 @@ self.batch_size = options.get('batch_size', 50) self.preserve = options.get("preserve", False) + self.index = options.get("index", False) + + if not self.index: + old_haystack_signal_processor = getattr(settings, "HAYSTACK_SIGNAL_PROCESSOR", None) + #this is not recommended by the django manual, but in case of management command it seems to work + if old_haystack_signal_processor: + settings.HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.BaseSignalProcessor' + transaction.enter_transaction_management() transaction.managed(True) @@ -127,4 +142,7 @@ print("%d error(s) when processing %s, check your log file." % (len(errors), records_url)) transaction.leave_transaction_management() + + if not self.index and old_haystack_signal_processor: + settings.HAYSTACK_SIGNAL_PROCESSOR = old_haystack_signal_processor diff -r c05567404888 -r 93b45b4f423c src/p4l/models/data.py --- a/src/p4l/models/data.py Fri Sep 20 22:21:48 2013 +0200 +++ b/src/p4l/models/data.py Sat Sep 21 23:49:04 2013 +0200 @@ -177,6 +177,9 @@ def get_authors(self): return [a.name for a in self.authors.all()] + def get_corporate_authors(self): + return [c.uri for c in self.corporateAuthors.all()] + def get_imprints_years(self): return sorted(set([i.imprintDate for i in self.imprints.all() if i.imprintDate])) diff -r c05567404888 -r 93b45b4f423c src/p4l/search/index.py --- a/src/p4l/search/index.py Fri Sep 20 22:21:48 2013 +0200 +++ b/src/p4l/search/index.py Sat Sep 21 23:49:04 2013 +0200 @@ -8,25 +8,53 @@ from haystack import indexes from p4l.models import Record -from p4l.utils import strip_accents +from p4l.utils import strip_accents, get_labels_for_uris, safe_cache_key + +from django.core.cache import get_cache +from django.conf import settings +import logging + +logger = logging.getLogger(__name__) + +def get_organizations_label(uris): + cache = get_cache('indexation') + + res = {} + missing_uris = [] + + for uri in uris: + label = cache.get(safe_cache_key(uri)) + if label is not None: + res[uri] = label + else: + missing_uris.append(uri) + + new_labels = get_labels_for_uris(missing_uris, settings.RDF_SCHEMES['organization'], None, True) + for k,v in new_labels.iteritems(): + cache.set(safe_cache_key(k),v) + res[k] = v + + return res -class RecordIndex(indexes.SearchIndex, indexes.Indexable): +class RecordIndex(indexes.SearchIndex, indexes.Indexable): text = indexes.CharField(document=True, use_template=True, stored=False) identifier = indexes.CharField(model_attr="identifier", stored=True) titles = indexes.MultiValueField(model_attr="get_titles", stored=False) titles_src = indexes.MultiValueField(model_attr="get_titles", stored=True, indexed=False) - authors = indexes.MultiValueField(model_attr="get_authors", stored=False) + authors = indexes.MultiValueField(model_attr="all_authors", stored=False) years = indexes.MultiValueField(model_attr="get_imprints_years", indexed=False, stored=True) + def prepare(self, obj): + authors = obj.get_authors() + get_organizations_label(obj.get_corporate_authors()).values() + obj.all_authors = [strip_accents(unicode(v)) for v in authors] + return indexes.SearchIndex.prepare(self, obj) + def prepare_titles(self, obj): return [strip_accents(v) for v in obj.get_titles()] - def prepare_authors(self, obj): - return [strip_accents(v) for v in obj.get_authors()] - def get_model(self): return Record @@ -34,4 +62,4 @@ return "modification_date" def index_queryset(self, using=None): - return Record.objects.using(using).all().prefetch_related("imprints","authors", "titles") \ No newline at end of file + return Record.objects.using(using).all().prefetch_related("imprints","authors", "titles", "corporateAuthors") \ No newline at end of file diff -r c05567404888 -r 93b45b4f423c src/p4l/search/signals.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/p4l/search/signals.py Sat Sep 21 23:49:04 2013 +0200 @@ -0,0 +1,33 @@ +# -*- coding: utf-8 -*- +''' +Created on Feb 22, 2013 + +@author: ymh +''' +from django.db import models +from haystack import signals + + +class P4lSignalProcessor(signals.BaseSignalProcessor): + + def __connect_signals(self, klass): + models.signals.post_save.connect(self.handle_save, sender=klass) + models.signals.post_delete.connect(self.handle_delete, sender=klass) + + def __disconnect_signals(self, klass): + models.signals.post_save.disconnect(self.handle_save, sender=klass) + models.signals.post_delete.disconnect(self.handle_delete, sender=klass) + + + def setup(self): + #put import here to avoid circular + from p4l.models.data import Record + self.__connect_signals(Record) + + + + def teardown(self): + + from p4l.models.data import Record + self.__disconnect_signals(Record) + \ No newline at end of file diff -r c05567404888 -r 93b45b4f423c src/p4l/settings.py --- a/src/p4l/settings.py Fri Sep 20 22:21:48 2013 +0200 +++ b/src/p4l/settings.py Sat Sep 21 23:49:04 2013 +0200 @@ -174,6 +174,8 @@ } } +NB_RECORDS_BY_PAGE = 20 + HAYSTACK_CONNECTIONS = { 'default': { 'ENGINE': 'haystack.backends.elasticsearch_backend.ElasticsearchSearchEngine', @@ -182,9 +184,21 @@ }, } +HAYSTACK_SIGNAL_PROCESSOR = 'p4l.search.signals.P4lSignalProcessor' +HAYSTACK_SEARCH_RESULTS_PER_PAGE = NB_RECORDS_BY_PAGE + +CACHES = { + 'default' : { + 'BACKEND': 'django.core.cache.backends.dummy.DummyCache', + }, + 'indexation': { + 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache', + 'LOCATION': 'p4l-indexation', + 'TIMEOUT': 300, + } +} -NB_RECORDS_BY_PAGE = 20 SPARQL_QUERY_ENDPOINT = "http://localhost:8080/openrdf-sesame/repositories/plan4learning" SPARQL_SUBJECT_QUERIES = { "filter" : """ @@ -524,6 +538,17 @@ "child-count" : "" } +RDF_SCHEMES = { + 'organization': 'http://www.iiep.unesco.org/plan4learning/scheme/Organizations', + 'audience': '', + 'language': 'http://www.iiep.unesco.org/plan4learning/scheme/Languages', + 'type': 'http://www.iiep.unesco.org/plan4learning/scheme/DocumentType', + 'subject': 'http://skos.um.es/unescothes/CS000', + 'theme': 'http://www.iiep.unesco.org/plan4learning/scheme/Themes', + 'country': 'http://skos.um.es/unescothes/CS000/Countries', + 'project': 'http://www.iiep.unesco.org/plan4learning/scheme/Projects' +} + REST_FRAMEWORK = { # Use hyperlinked styles by default. # Only used if the `serializer_class` attribute is not set on a view. diff -r c05567404888 -r 93b45b4f423c src/p4l/templates/search/indexes/p4l/record_text.txt --- a/src/p4l/templates/search/indexes/p4l/record_text.txt Fri Sep 20 22:21:48 2013 +0200 +++ b/src/p4l/templates/search/indexes/p4l/record_text.txt Sat Sep 21 23:49:04 2013 +0200 @@ -1,4 +1,4 @@ {% load p4lstringfilters %} {{object.identifier}} {% for t in object.titles.all %}{{t.title | strip_accents }}{% if not forloop.last %} | {% endif %}{% endfor %} -{% for a in object.authors.all %}{{a.name | strip_accents }}{% if not forloop.last %} | {% endif %}{% endfor %} \ No newline at end of file +{% for a in object.all_authors %}{{a}}{% if not forloop.last %} | {% endif %}{% endfor %} \ No newline at end of file diff -r c05567404888 -r 93b45b4f423c src/p4l/utils.py --- a/src/p4l/utils.py Fri Sep 20 22:21:48 2013 +0200 +++ b/src/p4l/utils.py Sat Sep 21 23:49:04 2013 +0200 @@ -2,11 +2,13 @@ import codecs import logging import math +import hashlib import sys import unicodedata from django.conf import settings from django.core.validators import URLValidator +from django.utils.http import urlquote_plus import requests @@ -184,4 +186,24 @@ def strip_accents(value): return ''.join(c for c in unicodedata.normalize('NFD', value) if unicodedata.category(c) != 'Mn') + + +def safe_cache_key(value): + '''Returns an md5 hexdigest of value if len(value) > 250. Replaces invalid memcache + control characters with an underscore. Also adds the CACHE_MIDDLEWARE_KEY_PREFIX + to your keys automatically. + ''' + value = urlquote_plus(value) + for char in value: + if ord(char) < 33: + value = value.replace(char, '_') + + value = "%s_%s" % (settings.CACHE_MIDDLEWARE_KEY_PREFIX, value) + + if len(value) <= 250: + return value + + return hashlib.md5(value).hexdigest() + + \ No newline at end of file