--- a/src/p4l/management/commands/import_record.py Fri Sep 20 22:21:48 2013 +0200
+++ b/src/p4l/management/commands/import_record.py Sat Sep 21 23:49:04 2013 +0200
@@ -11,6 +11,7 @@
from p4l.mapping.parsers import RecordParser, QueryCache
from p4l.utils import show_progress
import xml.etree.cElementTree as ET
+from django.conf import settings
logger = logging.getLogger(__name__)
@@ -43,6 +44,12 @@
default=False,
help= 'preserve existing record'
),
+ make_option('-i', '--index',
+ dest= 'index',
+ action='store_true',
+ default=False,
+ help= 'index while importing'
+ ),
)
def __init__(self, *args, **kwargs):
@@ -116,6 +123,14 @@
self.batch_size = options.get('batch_size', 50)
self.preserve = options.get("preserve", False)
+ self.index = options.get("index", False)
+
+ if not self.index:
+ old_haystack_signal_processor = getattr(settings, "HAYSTACK_SIGNAL_PROCESSOR", None)
+ #this is not recommended by the django manual, but in case of management command it seems to work
+ if old_haystack_signal_processor:
+ settings.HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.BaseSignalProcessor'
+
transaction.enter_transaction_management()
transaction.managed(True)
@@ -127,4 +142,7 @@
print("%d error(s) when processing %s, check your log file." % (len(errors), records_url))
transaction.leave_transaction_management()
+
+ if not self.index and old_haystack_signal_processor:
+ settings.HAYSTACK_SIGNAL_PROCESSOR = old_haystack_signal_processor
--- a/src/p4l/models/data.py Fri Sep 20 22:21:48 2013 +0200
+++ b/src/p4l/models/data.py Sat Sep 21 23:49:04 2013 +0200
@@ -177,6 +177,9 @@
def get_authors(self):
return [a.name for a in self.authors.all()]
+ def get_corporate_authors(self):
+ return [c.uri for c in self.corporateAuthors.all()]
+
def get_imprints_years(self):
return sorted(set([i.imprintDate for i in self.imprints.all() if i.imprintDate]))
--- a/src/p4l/search/index.py Fri Sep 20 22:21:48 2013 +0200
+++ b/src/p4l/search/index.py Sat Sep 21 23:49:04 2013 +0200
@@ -8,25 +8,53 @@
from haystack import indexes
from p4l.models import Record
-from p4l.utils import strip_accents
+from p4l.utils import strip_accents, get_labels_for_uris, safe_cache_key
+
+from django.core.cache import get_cache
+from django.conf import settings
+import logging
+
+logger = logging.getLogger(__name__)
+
+def get_organizations_label(uris):
+ cache = get_cache('indexation')
+
+ res = {}
+ missing_uris = []
+
+ for uri in uris:
+ label = cache.get(safe_cache_key(uri))
+ if label is not None:
+ res[uri] = label
+ else:
+ missing_uris.append(uri)
+
+ new_labels = get_labels_for_uris(missing_uris, settings.RDF_SCHEMES['organization'], None, True)
+ for k,v in new_labels.iteritems():
+ cache.set(safe_cache_key(k),v)
+ res[k] = v
+
+ return res
-class RecordIndex(indexes.SearchIndex, indexes.Indexable):
+class RecordIndex(indexes.SearchIndex, indexes.Indexable):
text = indexes.CharField(document=True, use_template=True, stored=False)
identifier = indexes.CharField(model_attr="identifier", stored=True)
titles = indexes.MultiValueField(model_attr="get_titles", stored=False)
titles_src = indexes.MultiValueField(model_attr="get_titles", stored=True, indexed=False)
- authors = indexes.MultiValueField(model_attr="get_authors", stored=False)
+ authors = indexes.MultiValueField(model_attr="all_authors", stored=False)
years = indexes.MultiValueField(model_attr="get_imprints_years", indexed=False, stored=True)
+ def prepare(self, obj):
+ authors = obj.get_authors() + get_organizations_label(obj.get_corporate_authors()).values()
+ obj.all_authors = [strip_accents(unicode(v)) for v in authors]
+ return indexes.SearchIndex.prepare(self, obj)
+
def prepare_titles(self, obj):
return [strip_accents(v) for v in obj.get_titles()]
- def prepare_authors(self, obj):
- return [strip_accents(v) for v in obj.get_authors()]
-
def get_model(self):
return Record
@@ -34,4 +62,4 @@
return "modification_date"
def index_queryset(self, using=None):
- return Record.objects.using(using).all().prefetch_related("imprints","authors", "titles")
\ No newline at end of file
+ return Record.objects.using(using).all().prefetch_related("imprints","authors", "titles", "corporateAuthors")
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/p4l/search/signals.py Sat Sep 21 23:49:04 2013 +0200
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Feb 22, 2013
+
+@author: ymh
+'''
+from django.db import models
+from haystack import signals
+
+
+class P4lSignalProcessor(signals.BaseSignalProcessor):
+
+ def __connect_signals(self, klass):
+ models.signals.post_save.connect(self.handle_save, sender=klass)
+ models.signals.post_delete.connect(self.handle_delete, sender=klass)
+
+ def __disconnect_signals(self, klass):
+ models.signals.post_save.disconnect(self.handle_save, sender=klass)
+ models.signals.post_delete.disconnect(self.handle_delete, sender=klass)
+
+
+ def setup(self):
+ #put import here to avoid circular
+ from p4l.models.data import Record
+ self.__connect_signals(Record)
+
+
+
+ def teardown(self):
+
+ from p4l.models.data import Record
+ self.__disconnect_signals(Record)
+
\ No newline at end of file
--- a/src/p4l/settings.py Fri Sep 20 22:21:48 2013 +0200
+++ b/src/p4l/settings.py Sat Sep 21 23:49:04 2013 +0200
@@ -174,6 +174,8 @@
}
}
+NB_RECORDS_BY_PAGE = 20
+
HAYSTACK_CONNECTIONS = {
'default': {
'ENGINE': 'haystack.backends.elasticsearch_backend.ElasticsearchSearchEngine',
@@ -182,9 +184,21 @@
},
}
+HAYSTACK_SIGNAL_PROCESSOR = 'p4l.search.signals.P4lSignalProcessor'
+HAYSTACK_SEARCH_RESULTS_PER_PAGE = NB_RECORDS_BY_PAGE
+
+CACHES = {
+ 'default' : {
+ 'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
+ },
+ 'indexation': {
+ 'BACKEND': 'django.core.cache.backends.locmem.LocMemCache',
+ 'LOCATION': 'p4l-indexation',
+ 'TIMEOUT': 300,
+ }
+}
-NB_RECORDS_BY_PAGE = 20
SPARQL_QUERY_ENDPOINT = "http://localhost:8080/openrdf-sesame/repositories/plan4learning"
SPARQL_SUBJECT_QUERIES = {
"filter" : """
@@ -524,6 +538,17 @@
"child-count" : ""
}
+RDF_SCHEMES = {
+ 'organization': 'http://www.iiep.unesco.org/plan4learning/scheme/Organizations',
+ 'audience': '',
+ 'language': 'http://www.iiep.unesco.org/plan4learning/scheme/Languages',
+ 'type': 'http://www.iiep.unesco.org/plan4learning/scheme/DocumentType',
+ 'subject': 'http://skos.um.es/unescothes/CS000',
+ 'theme': 'http://www.iiep.unesco.org/plan4learning/scheme/Themes',
+ 'country': 'http://skos.um.es/unescothes/CS000/Countries',
+ 'project': 'http://www.iiep.unesco.org/plan4learning/scheme/Projects'
+}
+
REST_FRAMEWORK = {
# Use hyperlinked styles by default.
# Only used if the `serializer_class` attribute is not set on a view.
--- a/src/p4l/templates/search/indexes/p4l/record_text.txt Fri Sep 20 22:21:48 2013 +0200
+++ b/src/p4l/templates/search/indexes/p4l/record_text.txt Sat Sep 21 23:49:04 2013 +0200
@@ -1,4 +1,4 @@
{% load p4lstringfilters %}
{{object.identifier}}
{% for t in object.titles.all %}{{t.title | strip_accents }}{% if not forloop.last %} | {% endif %}{% endfor %}
-{% for a in object.authors.all %}{{a.name | strip_accents }}{% if not forloop.last %} | {% endif %}{% endfor %}
\ No newline at end of file
+{% for a in object.all_authors %}{{a}}{% if not forloop.last %} | {% endif %}{% endfor %}
\ No newline at end of file
--- a/src/p4l/utils.py Fri Sep 20 22:21:48 2013 +0200
+++ b/src/p4l/utils.py Sat Sep 21 23:49:04 2013 +0200
@@ -2,11 +2,13 @@
import codecs
import logging
import math
+import hashlib
import sys
import unicodedata
from django.conf import settings
from django.core.validators import URLValidator
+from django.utils.http import urlquote_plus
import requests
@@ -184,4 +186,24 @@
def strip_accents(value):
return ''.join(c for c in unicodedata.normalize('NFD', value)
if unicodedata.category(c) != 'Mn')
+
+
+def safe_cache_key(value):
+ '''Returns an md5 hexdigest of value if len(value) > 250. Replaces invalid memcache
+ control characters with an underscore. Also adds the CACHE_MIDDLEWARE_KEY_PREFIX
+ to your keys automatically.
+ '''
+ value = urlquote_plus(value)
+ for char in value:
+ if ord(char) < 33:
+ value = value.replace(char, '_')
+
+ value = "%s_%s" % (settings.CACHE_MIDDLEWARE_KEY_PREFIX, value)
+
+ if len(value) <= 250:
+ return value
+
+ return hashlib.md5(value).hexdigest()
+
+
\ No newline at end of file