| author | ymh <ymh.work@gmail.com> |
| Wed, 13 May 2015 02:07:46 +0200 | |
| changeset 1371 | 17b7a6d9959d |
| parent 1300 | 7a638196577d |
| child 1484 | 5a8702a8adf0 |
| permissions | -rw-r--r-- |
| 1275 | 1 |
import re |
2 |
import sys |
|
3 |
||
| 77 | 4 |
from django.conf import settings |
| 1117 | 5 |
from haystack import connections |
6 |
from haystack.constants import DEFAULT_ALIAS |
|
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
7 |
from haystack.query import SearchQuerySet |
| 1275 | 8 |
|
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
9 |
from ldt.indexation.highlighter import LdtHighlighter as Highlighter |
| 725 | 10 |
from ldt.indexation.query_parser import QueryParser |
| 1275 | 11 |
|
12 |
from .backends import elasticsearch_backend as ldt_elasticsearch_backend |
|
13 |
||
| 1117 | 14 |
|
| 1275 | 15 |
def get_results_with_context(model, field, query, content_list=None, highlight=True): |
|
568
b67fc0fd2389
small correction on indexation + version bum
ymh <ymh.work@gmail.com>
parents:
452
diff
changeset
|
16 |
|
| 1275 | 17 |
results = get_results_list(model, field, query, highlight) |
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
18 |
contexts = [] |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
19 |
content_iri_ids = None |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
20 |
if content_list is not None : |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
21 |
content_iri_ids = [ctt.iri_id for ctt in content_list] |
| 594 | 22 |
|
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
23 |
for res in results: |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
24 |
doc = res.get_stored_fields() |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
25 |
if content_iri_ids is None or (content_iri_ids is not None and doc.get("iri_id") in content_iri_ids) : |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
26 |
doc["score"] = res.score |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
27 |
doc["indexation_id"] = res.pk |
| 1278 | 28 |
doc["context"] = doc.get("abstract", "") |
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
29 |
doc["highlighted"] = res.highlighted |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
30 |
contexts.append(doc) |
| 1275 | 31 |
return contexts |
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
32 |
|
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
33 |
|
| 176 | 34 |
|
| 1275 | 35 |
def get_results_list(model, field, query, highlight=True): |
| 1191 | 36 |
|
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
37 |
if field == 'all': |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
38 |
field = 'text' |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
39 |
|
| 725 | 40 |
qp = QueryParser(field) |
41 |
||
| 1275 | 42 |
qs = SearchQuerySet().models(model).filter(qp.parse(query)) |
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
43 |
if highlight: |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
44 |
qs = qs.highlight() |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
45 |
return qs |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
46 |
|
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
47 |
|
| 176 | 48 |
|
| 716 | 49 |
def get_result_text(field, query): |
50 |
||
| 1191 | 51 |
#put import here to avoid a circular dependency |
52 |
from ldt.text.models import Annotation |
|
53 |
||
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
54 |
if field == 'all': |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
55 |
field = 'text' |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
56 |
elif field == 'text': |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
57 |
field = 'text_field' |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
58 |
|
| 725 | 59 |
qp = QueryParser(field) |
60 |
qs = SearchQuerySet.models(Annotation).filter(qp.parse(query)) |
|
| 716 | 61 |
|
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
62 |
return [{'external_id':res.get_stored_fields()['external_id'], 'title': res.get_stored_fields()['title'], 'score': res.score} for res in qs] |
| 716 | 63 |
|
|
452
8e9494006e7b
segment abstracts + content images can be retrieved directly from search results page
verrierj
parents:
349
diff
changeset
|
64 |
def highlight_documents(results_list, query, field): |
| 183 | 65 |
|
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
66 |
highlight = Highlighter(query, html_tag="span", css_class="highlight", max_length=sys.maxint) |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
67 |
|
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
68 |
for project in results_list: |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
69 |
for segment in project['list']: |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
70 |
if hasattr(segment, "highlighted") and segment.highlighted: |
| 719 | 71 |
#TODO : |
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
72 |
highlighted_text = { |
| 719 | 73 |
"context" : segment.highlighted.get('abstract',[segment.abstract])[0], |
| 1371 | 74 |
"tags" : segment.highlighted.get('tags',[segment.get_tags()])[0], |
| 719 | 75 |
'title' : segment.highlighted.get('title',[segment.title])[0], |
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
76 |
} |
| 176 | 77 |
|
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
78 |
else: |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
79 |
highlighted_text = { |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
80 |
"context" : highlight.highlight(segment.abstract), |
| 1371 | 81 |
"tags" : highlight.highlight(segment.get_tags()), |
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
82 |
'title' : highlight.highlight(segment.title) |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
83 |
} |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
84 |
|
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
85 |
segment.context = highlighted_text['context'] |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
86 |
segment.title = highlighted_text['title'] |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
87 |
tags = highlighted_text['tags'] |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
88 |
segment.context_tags = tags[tags.find(';')+1:] |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
89 |
|
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
90 |
return results_list |
| 716 | 91 |
|
| 1117 | 92 |
def object_delete(model, **kwargs): |
93 |
||
94 |
||
95 |
kwargs_filter = kwargs.copy() |
|
96 |
kwargs_filter.pop('using', None) |
|
97 |
||
98 |
# here we do a poor man transaction management. |
|
99 |
# the is no clear transaction management in Haystack. |
|
100 |
# therefore, we give priority to the database and delete there first. |
|
101 |
# if there is an error there, the index will not be updated. |
|
102 |
||
103 |
objs = list(model.objects.filter(**kwargs_filter)) |
|
104 |
||
105 |
model.objects.filter(**kwargs_filter).delete() |
|
106 |
||
107 |
using = None |
|
108 |
if 'using' in kwargs: |
|
109 |
using = kwargs.get('using', None) |
|
110 |
if not using: |
|
111 |
using = DEFAULT_ALIAS |
|
112 |
||
113 |
conn = connections[using] |
|
114 |
||
115 |
if isinstance(conn, ldt_elasticsearch_backend.ElasticsearchSearchEngine): |
|
116 |
conn.get_backend().remove(objs, commit=True) |
|
117 |
else: |
|
118 |
for o in objs: |
|
119 |
conn.get_backend().remove(o, commit=True) |
|
120 |
||
| 1300 | 121 |
|
122 |
||
| 1117 | 123 |
def object_insert(model, object_list, func_key, using = None): |
124 |
||
125 |
if not object_list: |
|
126 |
return |
|
127 |
||
128 |
model.objects.bulk_create(object_list) |
|
129 |
obj_dict = dict(model.objects.filter(**{func_key+'__in':[getattr(o, func_key) for o in object_list]}).values_list(func_key,"id")) |
|
130 |
||
131 |
for o in object_list: |
|
132 |
o.id = obj_dict[getattr(o,func_key)] |
|
133 |
||
| 1300 | 134 |
|
135 |
def object_run_index(model, object_list, using = None): |
|
136 |
||
137 |
if not object_list: |
|
138 |
return |
|
139 |
||
| 1117 | 140 |
if not using: |
141 |
using = DEFAULT_ALIAS |
|
142 |
||
143 |
conn = connections[using] |
|
144 |
||
145 |
backend = conn.get_backend() |
|
146 |
unified_index = conn.get_unified_index() |
|
147 |
||
148 |
index = unified_index.get_index(model) |
|
149 |
||
150 |
backend.update(index, object_list) |
|
| 1300 | 151 |
|
| 1117 | 152 |
|
153 |
||
| 719 | 154 |
class SimpleSearch(object): |
155 |
||
| 1275 | 156 |
def query(self, model, field, query): |
157 |
hits = get_results_list(model, field, query) |
|
| 719 | 158 |
|
159 |
res = [] |
|
160 |
for hit in hits: |
|
161 |
res.append(hit.get_stored_fields()) |
|
162 |
return res |
|
163 |
||
164 |
def query_all(self, query): |
|
165 |
return self.query("all", query) |
|
| 716 | 166 |
|
167 |
||
|
715
f21459554182
Remove lucene dependancies in model
ymh <ymh.work@gmail.com>
parents:
602
diff
changeset
|
168 |
|
|
f21459554182
Remove lucene dependancies in model
ymh <ymh.work@gmail.com>
parents:
602
diff
changeset
|
169 |
|
| 176 | 170 |
|
171 |
||
172 |