| author | cavaliet |
| Tue, 06 Nov 2012 13:56:54 +0100 | |
| changeset 895 | e76df6d34e6f |
| parent 725 | 4f4005df9a97 |
| child 1117 | 3bab1e42acfa |
| permissions | -rw-r--r-- |
| 77 | 1 |
from django.conf import settings |
2 |
||
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
3 |
from haystack.query import SearchQuerySet |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
4 |
from ldt.indexation.highlighter import LdtHighlighter as Highlighter |
| 725 | 5 |
from ldt.indexation.query_parser import QueryParser |
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
6 |
from ldt.ldt_utils.models import Segment |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
7 |
from ldt.text.models import Annotation |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
8 |
import re |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
9 |
import sys |
| 719 | 10 |
|
11 |
def get_results_with_context(field, query, content_list=None, highlight=True): |
|
|
568
b67fc0fd2389
small correction on indexation + version bum
ymh <ymh.work@gmail.com>
parents:
452
diff
changeset
|
12 |
|
| 719 | 13 |
results = get_results_list(field, query, highlight) |
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
14 |
contexts = [] |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
15 |
content_iri_ids = None |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
16 |
if content_list is not None : |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
17 |
content_iri_ids = [ctt.iri_id for ctt in content_list] |
| 594 | 18 |
|
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
19 |
for res in results: |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
20 |
doc = res.get_stored_fields() |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
21 |
if content_iri_ids is None or (content_iri_ids is not None and doc.get("iri_id") in content_iri_ids) : |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
22 |
doc["score"] = res.score |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
23 |
doc["indexation_id"] = res.pk |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
24 |
doc["context"] = doc["abstract"] |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
25 |
doc["highlighted"] = res.highlighted |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
26 |
contexts.append(doc) |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
27 |
return contexts |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
28 |
|
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
29 |
|
| 176 | 30 |
|
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
31 |
def get_results_list(field, query, highlight=True): |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
32 |
|
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
33 |
if field == 'all': |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
34 |
field = 'text' |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
35 |
|
| 725 | 36 |
qp = QueryParser(field) |
37 |
||
38 |
qs = SearchQuerySet().models(Segment).filter(qp.parse(query)) |
|
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
39 |
if highlight: |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
40 |
qs = qs.highlight() |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
41 |
return qs |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
42 |
|
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
43 |
|
| 176 | 44 |
|
| 716 | 45 |
def get_result_text(field, query): |
46 |
||
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
47 |
if field == 'all': |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
48 |
field = 'text' |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
49 |
elif field == 'text': |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
50 |
field = 'text_field' |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
51 |
|
| 725 | 52 |
qp = QueryParser(field) |
53 |
qs = SearchQuerySet.models(Annotation).filter(qp.parse(query)) |
|
| 716 | 54 |
|
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
55 |
return [{'external_id':res.get_stored_fields()['external_id'], 'title': res.get_stored_fields()['title'], 'score': res.score} for res in qs] |
| 716 | 56 |
|
|
452
8e9494006e7b
segment abstracts + content images can be retrieved directly from search results page
verrierj
parents:
349
diff
changeset
|
57 |
def highlight_documents(results_list, query, field): |
| 183 | 58 |
|
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
59 |
highlight = Highlighter(query, html_tag="span", css_class="highlight", max_length=sys.maxint) |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
60 |
|
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
61 |
for project in results_list: |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
62 |
for segment in project['list']: |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
63 |
if hasattr(segment, "highlighted") and segment.highlighted: |
| 719 | 64 |
#TODO : |
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
65 |
highlighted_text = { |
| 719 | 66 |
"context" : segment.highlighted.get('abstract',[segment.abstract])[0], |
67 |
"tags" : segment.highlighted.get('tags',[segment.tags])[0], |
|
68 |
'title' : segment.highlighted.get('title',[segment.title])[0], |
|
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
69 |
} |
| 176 | 70 |
|
|
718
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
71 |
else: |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
72 |
highlighted_text = { |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
73 |
"context" : highlight.highlight(segment.abstract), |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
74 |
"tags" : highlight.highlight(segment.tags), |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
75 |
'title' : highlight.highlight(segment.title) |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
76 |
} |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
77 |
|
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
78 |
segment.context = highlighted_text['context'] |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
79 |
segment.title = highlighted_text['title'] |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
80 |
tags = highlighted_text['tags'] |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
81 |
segment.context_tags = tags[tags.find(';')+1:] |
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
82 |
|
|
5e27a39d3742
replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
716
diff
changeset
|
83 |
return results_list |
| 716 | 84 |
|
| 719 | 85 |
class SimpleSearch(object): |
86 |
||
87 |
def query(self, field, query): |
|
88 |
hits = get_results_list(field, query) |
|
89 |
||
90 |
res = [] |
|
91 |
for hit in hits: |
|
92 |
res.append(hit.get_stored_fields()) |
|
93 |
return res |
|
94 |
||
95 |
def query_all(self, query): |
|
96 |
return self.query("all", query) |
|
| 716 | 97 |
|
98 |
||
|
715
f21459554182
Remove lucene dependancies in model
ymh <ymh.work@gmail.com>
parents:
602
diff
changeset
|
99 |
|
|
f21459554182
Remove lucene dependancies in model
ymh <ymh.work@gmail.com>
parents:
602
diff
changeset
|
100 |
|
| 176 | 101 |
|
102 |
||
103 |