src/ldt/ldt/indexation/backends/elasticsearch_backend.py
author ymh <ymh.work@gmail.com>
Mon, 20 May 2013 18:02:37 +0200
changeset 1191 b6e0b1811723
parent 1190 129d45eec68c
child 1320 88ce48689c14
permissions -rw-r--r--
Migrate to django 1.5 : - migrate the user profile - do sme cleaning
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
719
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
'''
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
Created on Jul 30, 2012
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
@author: ymh
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
'''
1190
129d45eec68c Clean warning and errors for Django 1.5
ymh <ymh.work@gmail.com>
parents: 1181
diff changeset
     7
from haystack.backends import BaseEngine, elasticsearch_backend
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
     8
from haystack.exceptions import MissingDependency
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
     9
from haystack.utils import get_identifier
1191
b6e0b1811723 Migrate to django 1.5 :
ymh <ymh.work@gmail.com>
parents: 1190
diff changeset
    10
#from ldt.ldt_utils.models import Segment
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    11
import collections
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    12
try:
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    13
    import requests
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    14
except ImportError:
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    15
    raise MissingDependency("The 'elasticsearch' backend requires the installation of 'requests'.")
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    16
try:
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    17
    import pyelasticsearch
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    18
except ImportError:
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    19
    raise MissingDependency("The 'elasticsearch' backend requires the installation of 'pyelasticsearch'. Please refer to the documentation.")
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    20
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    21
719
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
class ElasticsearchSearchBackend(elasticsearch_backend.ElasticsearchSearchBackend):
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
    def build_search_kwargs(self, query_string, sort_by=None, start_offset=0, end_offset=None,
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
                        fields='', highlight=False, facets=None,
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
                        date_facets=None, query_facets=None,
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
                        narrow_queries=None, spelling_query=None,
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
                        within=None, dwithin=None, distance_point=None,
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
                        models=None, limit_to_registered_models=None,
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
                        result_class=None):
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
        
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
        kwargs = super(ElasticsearchSearchBackend, self).build_search_kwargs(query_string, sort_by=sort_by, start_offset=start_offset, end_offset=end_offset,
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
                        fields=fields, highlight=highlight, facets=facets,
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
                        date_facets=date_facets, query_facets=query_facets,
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
                        narrow_queries=narrow_queries, spelling_query=spelling_query,
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
                        within=within, dwithin=dwithin, distance_point=distance_point,
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
                        models=models, limit_to_registered_models=limit_to_registered_models,
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
                        result_class=result_class)
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
                
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
        #TODO : try to make list of field dynamic
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
        #TODO : How to handle multiple 
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
        if highlight:
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
            fields_def = { }
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
 
1191
b6e0b1811723 Migrate to django 1.5 :
ymh <ymh.work@gmail.com>
parents: 1190
diff changeset
    46
            if models is None or len(models) == 0 :#or Segment in models:
719
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
                fields_def['tags'] = {}
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
                fields_def['title'] = {}
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
                fields_def['abstract'] = {}
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
            
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
            kwargs['highlight'] = {
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
                'pre_tags' : ["<span class='highlight'>"],
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
                'post_tags' : ["</span>"],
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
                "number_of_fragments" : 0,
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
                'fields': fields_def                
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
            }
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
        
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
        return kwargs
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
    
1181
6ab716dd58f8 merge correction
ymh <ymh.work@gmail.com>
parents: 1117
diff changeset
    60
                
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    61
    def remove(self, obj_or_string, commit=True):
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    62
        
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    63
        if not self.setup_complete:
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    64
            try:
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    65
                self.setup()
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    66
            except (requests.RequestException, pyelasticsearch.ElasticHttpError), e:
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    67
                if not self.silently_fail:
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    68
                    raise
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    69
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    70
                self.log.error("Failed to remove document '%s' from Elasticsearch: %s", repr(obj_or_string), e)
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    71
                return
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    72
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    73
        if isinstance(obj_or_string, collections.Iterable) and not isinstance(obj_or_string, basestring):
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    74
            ids = [get_identifier(elt) for elt in obj_or_string]
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    75
            if not ids:
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    76
                return
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    77
            q = {'ids' : {'values' : ids}}
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    78
            self.conn.delete_by_query(self.index_name, 'modelresult', q)
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    79
        else: 
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 719
diff changeset
    80
            return super(ElasticsearchSearchBackend, self).remove(obj_or_string, commit=commit) 
719
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
class ElasticsearchSearchEngine(BaseEngine):
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
    backend = ElasticsearchSearchBackend
1c0ac4068bbe improve highlighting management
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
    query = elasticsearch_backend.ElasticsearchSearchQuery