add corporate authors and small adjustments
authorymh <ymh.work@gmail.com>
Sat, 21 Sep 2013 23:49:04 +0200
changeset 114 93b45b4f423c
parent 113 c05567404888
child 115 4749704f9b40
add corporate authors and small adjustments
src/p4l/management/commands/import_record.py
src/p4l/models/data.py
src/p4l/search/index.py
src/p4l/search/signals.py
src/p4l/settings.py
src/p4l/templates/search/indexes/p4l/record_text.txt
src/p4l/utils.py
--- a/src/p4l/management/commands/import_record.py	Fri Sep 20 22:21:48 2013 +0200
+++ b/src/p4l/management/commands/import_record.py	Sat Sep 21 23:49:04 2013 +0200
@@ -11,6 +11,7 @@
 from p4l.mapping.parsers import RecordParser, QueryCache
 from p4l.utils import show_progress
 import xml.etree.cElementTree as ET
+from django.conf import settings
 
 
 logger = logging.getLogger(__name__)
@@ -43,6 +44,12 @@
             default=False,
             help= 'preserve existing record' 
         ),
+        make_option('-i', '--index',
+            dest= 'index',
+            action='store_true',
+            default=False,
+            help= 'index while importing' 
+        ),
     )
 
     def __init__(self, *args, **kwargs):
@@ -116,6 +123,14 @@
 
         self.batch_size = options.get('batch_size', 50)
         self.preserve = options.get("preserve", False)
+        self.index = options.get("index", False)
+        
+        if not self.index:
+            old_haystack_signal_processor = getattr(settings, "HAYSTACK_SIGNAL_PROCESSOR", None)
+            #this is not recommended by the django manual, but in case of management command it seems to work
+            if old_haystack_signal_processor:
+                settings.HAYSTACK_SIGNAL_PROCESSOR = 'haystack.signals.BaseSignalProcessor' 
+        
         transaction.enter_transaction_management()
         transaction.managed(True)
 
@@ -127,4 +142,7 @@
                 print("%d error(s) when processing %s, check your log file." % (len(errors), records_url))
 
         transaction.leave_transaction_management()
+        
+        if not self.index and old_haystack_signal_processor:
+            settings.HAYSTACK_SIGNAL_PROCESSOR = old_haystack_signal_processor
 
--- a/src/p4l/models/data.py	Fri Sep 20 22:21:48 2013 +0200
+++ b/src/p4l/models/data.py	Sat Sep 21 23:49:04 2013 +0200
@@ -177,6 +177,9 @@
     def get_authors(self):
         return [a.name for a in self.authors.all()]
     
+    def get_corporate_authors(self):
+        return [c.uri for c in self.corporateAuthors.all()]
+    
     def get_imprints_years(self):
         return sorted(set([i.imprintDate for i in self.imprints.all() if i.imprintDate]))
 
--- a/src/p4l/search/index.py	Fri Sep 20 22:21:48 2013 +0200
+++ b/src/p4l/search/index.py	Sat Sep 21 23:49:04 2013 +0200
@@ -8,25 +8,53 @@
 from haystack import indexes
 
 from p4l.models import Record
-from p4l.utils import strip_accents
+from p4l.utils import strip_accents, get_labels_for_uris, safe_cache_key
+
+from django.core.cache import get_cache
+from django.conf import settings
+import logging
+
+logger = logging.getLogger(__name__)
+
+def get_organizations_label(uris):
+    cache = get_cache('indexation')
+    
+    res = {}
+    missing_uris = []
+    
+    for uri in uris: 
+        label = cache.get(safe_cache_key(uri))
+        if label is not None:
+            res[uri] = label
+        else:
+            missing_uris.append(uri)
+            
+    new_labels = get_labels_for_uris(missing_uris, settings.RDF_SCHEMES['organization'], None, True)
+    for k,v in new_labels.iteritems():
+        cache.set(safe_cache_key(k),v)
+        res[k] = v
+    
+    return res
 
 
-class RecordIndex(indexes.SearchIndex, indexes.Indexable):
+class RecordIndex(indexes.SearchIndex, indexes.Indexable):    
 
     text = indexes.CharField(document=True, use_template=True, stored=False)
     identifier = indexes.CharField(model_attr="identifier", stored=True)
     titles = indexes.MultiValueField(model_attr="get_titles", stored=False)
     titles_src = indexes.MultiValueField(model_attr="get_titles", stored=True, indexed=False)
-    authors = indexes.MultiValueField(model_attr="get_authors", stored=False)
+    authors = indexes.MultiValueField(model_attr="all_authors", stored=False)
     
     years = indexes.MultiValueField(model_attr="get_imprints_years", indexed=False, stored=True)
     
+    def prepare(self, obj):
+        authors = obj.get_authors() + get_organizations_label(obj.get_corporate_authors()).values()
+        obj.all_authors = [strip_accents(unicode(v)) for v in authors] 
+        return indexes.SearchIndex.prepare(self, obj)
+    
     def prepare_titles(self, obj):
         return [strip_accents(v) for v in obj.get_titles()]
 
-    def prepare_authors(self, obj):
-        return [strip_accents(v) for v in obj.get_authors()]
-
     def get_model(self):
         return Record
 
@@ -34,4 +62,4 @@
         return "modification_date"
     
     def index_queryset(self, using=None):
-        return Record.objects.using(using).all().prefetch_related("imprints","authors", "titles")
\ No newline at end of file
+        return Record.objects.using(using).all().prefetch_related("imprints","authors", "titles", "corporateAuthors")
\ No newline at end of file
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/p4l/search/signals.py	Sat Sep 21 23:49:04 2013 +0200
@@ -0,0 +1,33 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Feb 22, 2013
+
+@author: ymh
+'''
+from django.db import models
+from haystack import signals
+
+
+class P4lSignalProcessor(signals.BaseSignalProcessor):
+    
+    def __connect_signals(self, klass):
+        models.signals.post_save.connect(self.handle_save, sender=klass)
+        models.signals.post_delete.connect(self.handle_delete, sender=klass)        
+
+    def __disconnect_signals(self, klass):
+        models.signals.post_save.disconnect(self.handle_save, sender=klass)
+        models.signals.post_delete.disconnect(self.handle_delete, sender=klass)
+
+    
+    def setup(self):
+        #put import here to avoid circular         
+        from p4l.models.data import Record
+        self.__connect_signals(Record)
+
+
+
+    def teardown(self):
+
+        from p4l.models.data import Record
+        self.__disconnect_signals(Record)
+        
\ No newline at end of file
--- a/src/p4l/settings.py	Fri Sep 20 22:21:48 2013 +0200
+++ b/src/p4l/settings.py	Sat Sep 21 23:49:04 2013 +0200
@@ -174,6 +174,8 @@
     }
 }
 
+NB_RECORDS_BY_PAGE = 20
+
 HAYSTACK_CONNECTIONS = {
     'default': {
         'ENGINE': 'haystack.backends.elasticsearch_backend.ElasticsearchSearchEngine',
@@ -182,9 +184,21 @@
     },
 }
 
+HAYSTACK_SIGNAL_PROCESSOR = 'p4l.search.signals.P4lSignalProcessor'
+HAYSTACK_SEARCH_RESULTS_PER_PAGE = NB_RECORDS_BY_PAGE
+
+CACHES = {
+    'default' : {
+        'BACKEND': 'django.core.cache.backends.dummy.DummyCache',
+    },
+    'indexation': {
+        'BACKEND': 'django.core.cache.backends.locmem.LocMemCache',
+        'LOCATION': 'p4l-indexation',
+        'TIMEOUT': 300,
+    }
+}
 
 
-NB_RECORDS_BY_PAGE = 20
 SPARQL_QUERY_ENDPOINT = "http://localhost:8080/openrdf-sesame/repositories/plan4learning"
 SPARQL_SUBJECT_QUERIES = {
 "filter" : """
@@ -524,6 +538,17 @@
 "child-count" : ""
 }
 
+RDF_SCHEMES = {
+    'organization': 'http://www.iiep.unesco.org/plan4learning/scheme/Organizations',
+    'audience': '',
+    'language': 'http://www.iiep.unesco.org/plan4learning/scheme/Languages',
+    'type': 'http://www.iiep.unesco.org/plan4learning/scheme/DocumentType',
+    'subject': 'http://skos.um.es/unescothes/CS000',
+    'theme': 'http://www.iiep.unesco.org/plan4learning/scheme/Themes',
+    'country': 'http://skos.um.es/unescothes/CS000/Countries',    
+    'project': 'http://www.iiep.unesco.org/plan4learning/scheme/Projects'
+}
+
 REST_FRAMEWORK = {
     # Use hyperlinked styles by default.
     # Only used if the `serializer_class` attribute is not set on a view.
--- a/src/p4l/templates/search/indexes/p4l/record_text.txt	Fri Sep 20 22:21:48 2013 +0200
+++ b/src/p4l/templates/search/indexes/p4l/record_text.txt	Sat Sep 21 23:49:04 2013 +0200
@@ -1,4 +1,4 @@
 {% load p4lstringfilters %}
 {{object.identifier}}
 {% for t in object.titles.all %}{{t.title | strip_accents }}{% if not forloop.last %} | {% endif %}{% endfor %}
-{% for a in object.authors.all %}{{a.name | strip_accents }}{% if not forloop.last %} | {% endif %}{% endfor %}
\ No newline at end of file
+{% for a in object.all_authors %}{{a}}{% if not forloop.last %} | {% endif %}{% endfor %}
\ No newline at end of file
--- a/src/p4l/utils.py	Fri Sep 20 22:21:48 2013 +0200
+++ b/src/p4l/utils.py	Sat Sep 21 23:49:04 2013 +0200
@@ -2,11 +2,13 @@
 import codecs
 import logging
 import math
+import hashlib
 import sys
 import unicodedata
 
 from django.conf import settings
 from django.core.validators import URLValidator
+from django.utils.http import urlquote_plus
 import requests
 
 
@@ -184,4 +186,24 @@
 def strip_accents(value):
     return ''.join(c for c in unicodedata.normalize('NFD', value)
                   if unicodedata.category(c) != 'Mn')
+    
+    
+def safe_cache_key(value):
+    '''Returns an md5 hexdigest of value if len(value) > 250. Replaces invalid memcache
+       control characters with an underscore. Also adds the CACHE_MIDDLEWARE_KEY_PREFIX
+       to your keys automatically.
+    '''
+    value = urlquote_plus(value)
+    for char in value:
+        if ord(char) < 33:
+            value = value.replace(char, '_')
+    
+    value = "%s_%s" % (settings.CACHE_MIDDLEWARE_KEY_PREFIX, value)
+    
+    if len(value) <= 250:
+        return value
+    
+    return hashlib.md5(value).hexdigest()
+
+
     
\ No newline at end of file