src/ldt/ldt/indexation/search_indexes.py
author ymh <ymh.work@gmail.com>
Tue, 22 Oct 2024 09:57:18 +0200
changeset 1516 9cfcfbac1a43
parent 1494 5e6295488e38
permissions -rw-r--r--
Added tag V01.65.08 for changeset c08d6aa5a51d
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
718
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
# -*- coding: utf-8 -*-
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
'''
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
Created on Jul 23, 2012
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
@author: ymh
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
'''
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
from haystack import indexes
1275
38c396f3ced8 Add indexation to content
ymh <ymh.work@gmail.com>
parents: 1117
diff changeset
     9
38c396f3ced8 Add indexation to content
ymh <ymh.work@gmail.com>
parents: 1117
diff changeset
    10
from ldt.ldt_utils.models import Segment, Content
718
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
from ldt.text.models import Annotation
1494
5e6295488e38 add ways to better specify the protocol
ymh <ymh.work@gmail.com>
parents: 1484
diff changeset
    12
import datetime
718
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
1275
38c396f3ced8 Add indexation to content
ymh <ymh.work@gmail.com>
parents: 1117
diff changeset
    14
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 950
diff changeset
    15
class SegmentIndex(indexes.SearchIndex, indexes.Indexable):
718
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
    text = indexes.CharField(document=True, use_template=True)
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
    iri_id = indexes.CharField(model_attr='iri_id', indexed=False, stored=True)
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
    project_id = indexes.CharField(model_attr='project_id', indexed=False, stored=True, null=True)
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
    ensemble_id = indexes.CharField(model_attr='ensemble_id', indexed=False, stored=True)
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
    cutting_id = indexes.CharField(model_attr='cutting_id', indexed=False, stored=True)
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
    element_id = indexes.CharField(model_attr='element_id', indexed=False, stored=True)
1373
c9ed2ae1a902 correct problem with ompulsory tags for content and tags indexation
ymh <ymh.work@gmail.com>
parents: 1357
diff changeset
    22
    tags = indexes.CharField(model_attr='get_tags', stored=True)
718
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
    title = indexes.CharField(model_attr='title', stored=True)
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
    abstract = indexes.CharField(model_attr='abstract', stored=True)
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
    duration = indexes.IntegerField(model_attr='duration', indexed=False, stored=True)
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
    author = indexes.CharField(model_attr='author', stored=True, null=True)
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
    start_ts = indexes.IntegerField(model_attr='start_ts', indexed=False, stored=True)
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
    date = indexes.CharField(model_attr='date', stored=True)
1477
1de00f96a047 Corrections for elasticsearch (server) >= 2.2 and elasticserach (client lib) >= 2.3 (+version)
ymh <ymh.work@gmail.com>
parents: 1373
diff changeset
    29
718
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
    def get_model(self):
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
        return Segment
1477
1de00f96a047 Corrections for elasticsearch (server) >= 2.2 and elasticserach (client lib) >= 2.3 (+version)
ymh <ymh.work@gmail.com>
parents: 1373
diff changeset
    32
1494
5e6295488e38 add ways to better specify the protocol
ymh <ymh.work@gmail.com>
parents: 1484
diff changeset
    33
    # def prepare_date(self, obj):
5e6295488e38 add ways to better specify the protocol
ymh <ymh.work@gmail.com>
parents: 1484
diff changeset
    34
    #     return datetime.datetime.now().strftime("%Y-%m-%d"'T'"%H:%M:%S")
5e6295488e38 add ways to better specify the protocol
ymh <ymh.work@gmail.com>
parents: 1484
diff changeset
    35
1296
1a24fb79eb11 v1.53 : tagging to taggit migration
cavaliet
parents: 1277
diff changeset
    36
    def prepare_tags(self, obj):
1302
d3358c7f5e0c v1.53.3 : index correction
cavaliet
parents: 1300
diff changeset
    37
        if hasattr(obj, 'tag_list'):
d3358c7f5e0c v1.53.3 : index correction
cavaliet
parents: 1300
diff changeset
    38
            if obj.tag_list is not None:
d3358c7f5e0c v1.53.3 : index correction
cavaliet
parents: 1300
diff changeset
    39
                obj.tags = None # To avoid a second and useless db request
d3358c7f5e0c v1.53.3 : index correction
cavaliet
parents: 1300
diff changeset
    40
                return ",".join(obj.tag_list)
1296
1a24fb79eb11 v1.53 : tagging to taggit migration
cavaliet
parents: 1277
diff changeset
    41
        return ",".join([tag.name for tag in obj.tags.all()])
1477
1de00f96a047 Corrections for elasticsearch (server) >= 2.2 and elasticserach (client lib) >= 2.3 (+version)
ymh <ymh.work@gmail.com>
parents: 1373
diff changeset
    42
1304
e72664657829 v1.53.4 prefetch in indexes
cavaliet
parents: 1302
diff changeset
    43
    def index_queryset(self, using=None):
e72664657829 v1.53.4 prefetch in indexes
cavaliet
parents: 1302
diff changeset
    44
        "Used when the entire index for model is updated."
e72664657829 v1.53.4 prefetch in indexes
cavaliet
parents: 1302
diff changeset
    45
        return self.get_model().objects.prefetch_related("tags")
1477
1de00f96a047 Corrections for elasticsearch (server) >= 2.2 and elasticserach (client lib) >= 2.3 (+version)
ymh <ymh.work@gmail.com>
parents: 1373
diff changeset
    46
1484
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    47
class ContentIndex(indexes.SearchIndex, indexes.Indexable):
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    48
    text = indexes.CharField(document=True, use_template=True)
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    49
    iri_id = indexes.CharField(model_attr='iri_id', indexed=False, stored=True)
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    50
    tags = indexes.CharField(model_attr='get_tags', stored=True)
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    51
    title = indexes.CharField(model_attr='title', stored=True)
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    52
    description = indexes.CharField(model_attr='description', stored=True)
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    53
    creation_date = indexes.DateTimeField(model_attr='creation_date', stored=True)
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    54
    update_date = indexes.DateTimeField(model_attr='update_date', stored=True)
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    55
    authors = indexes.CharField(model_attr="get_authors", stored=True)
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    56
    duration = indexes.IntegerField(model_attr="duration", stored=True)
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    57
    content_creation_date = indexes.DateTimeField(model_attr="content_creation_date", stored=True)
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    58
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    59
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    60
    def get_model(self):
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    61
        return Content
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    62
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    63
    def prepare_tags(self, obj):
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    64
        if hasattr(obj, 'tag_list'):
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    65
            if obj.tag_list is not None:
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    66
                obj.tags = None # To avoid a second and useless db request
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    67
                return ",".join(obj.tag_list)
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    68
        return ",".join([tag.name for tag in obj.tags.all()])
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    69
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    70
    def index_queryset(self, using=None):
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    71
        "Used when the entire index for model is updated."
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    72
        return self.get_model().objects.prefetch_related("tags")
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    73
5a8702a8adf0 Add content indexing + API endpoint for content search
ymh <ymh.work@gmail.com>
parents: 1477
diff changeset
    74
1477
1de00f96a047 Corrections for elasticsearch (server) >= 2.2 and elasticserach (client lib) >= 2.3 (+version)
ymh <ymh.work@gmail.com>
parents: 1373
diff changeset
    75
1117
3bab1e42acfa - update haystack
ymh <ymh.work@gmail.com>
parents: 950
diff changeset
    76
class AnnotationIndex(indexes.SearchIndex, indexes.Indexable):
718
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
    text = indexes.CharField(document=True, use_template=True)
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
    tags = indexes.CharField(model_attr='tags', indexed=True, stored=False)
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
    title = indexes.CharField(model_attr='title', indexed=True, stored=True)
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    80
    abstract = indexes.CharField(model_attr='description', indexed=True, stored=False)
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
    text_field = indexes.CharField(model_attr='text', indexed=True, stored=False)
950
f08d8b3e78b8 Correct authentication on projects
ymh <ymh.work@gmail.com>
parents: 718
diff changeset
    82
    annotation_id = indexes.CharField(model_attr='external_id', indexed=False, stored=True)
718
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
    def get_model(self):
5e27a39d3742 replace lucene by haystack, remove references to lucene
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
        return Annotation
1477
1de00f96a047 Corrections for elasticsearch (server) >= 2.2 and elasticserach (client lib) >= 2.3 (+version)
ymh <ymh.work@gmail.com>
parents: 1373
diff changeset
    86
1296
1a24fb79eb11 v1.53 : tagging to taggit migration
cavaliet
parents: 1277
diff changeset
    87
    def prepare_tags(self, obj):
1a24fb79eb11 v1.53 : tagging to taggit migration
cavaliet
parents: 1277
diff changeset
    88
        return ",".join([tag.name for tag in obj.tags.all()])
1477
1de00f96a047 Corrections for elasticsearch (server) >= 2.2 and elasticserach (client lib) >= 2.3 (+version)
ymh <ymh.work@gmail.com>
parents: 1373
diff changeset
    89
1304
e72664657829 v1.53.4 prefetch in indexes
cavaliet
parents: 1302
diff changeset
    90
    def index_queryset(self, using=None):
e72664657829 v1.53.4 prefetch in indexes
cavaliet
parents: 1302
diff changeset
    91
        "Used when the entire index for model is updated."
e72664657829 v1.53.4 prefetch in indexes
cavaliet
parents: 1302
diff changeset
    92
        return self.get_model().objects.prefetch_related("tags")
1275
38c396f3ced8 Add indexation to content
ymh <ymh.work@gmail.com>
parents: 1117
diff changeset
    93
38c396f3ced8 Add indexation to content
ymh <ymh.work@gmail.com>
parents: 1117
diff changeset
    94
38c396f3ced8 Add indexation to content
ymh <ymh.work@gmail.com>
parents: 1117
diff changeset
    95
class ContentIndex(indexes.SearchIndex, indexes.Indexable):
38c396f3ced8 Add indexation to content
ymh <ymh.work@gmail.com>
parents: 1117
diff changeset
    96
    text = indexes.CharField(document=True, use_template=True)
1373
c9ed2ae1a902 correct problem with ompulsory tags for content and tags indexation
ymh <ymh.work@gmail.com>
parents: 1357
diff changeset
    97
    tags = indexes.CharField(model_attr='get_tags', indexed=True, stored=False, null=True)
1275
38c396f3ced8 Add indexation to content
ymh <ymh.work@gmail.com>
parents: 1117
diff changeset
    98
    title = indexes.CharField(model_attr='title', indexed=True, stored=True)
1277
622989468114 correct null values
ymh <ymh.work@gmail.com>
parents: 1275
diff changeset
    99
    abstract = indexes.CharField(model_attr='description', indexed=True, stored=False, null=True)
1477
1de00f96a047 Corrections for elasticsearch (server) >= 2.2 and elasticserach (client lib) >= 2.3 (+version)
ymh <ymh.work@gmail.com>
parents: 1373
diff changeset
   100
1275
38c396f3ced8 Add indexation to content
ymh <ymh.work@gmail.com>
parents: 1117
diff changeset
   101
    def get_model(self):
1296
1a24fb79eb11 v1.53 : tagging to taggit migration
cavaliet
parents: 1277
diff changeset
   102
        return Content
1477
1de00f96a047 Corrections for elasticsearch (server) >= 2.2 and elasticserach (client lib) >= 2.3 (+version)
ymh <ymh.work@gmail.com>
parents: 1373
diff changeset
   103
1296
1a24fb79eb11 v1.53 : tagging to taggit migration
cavaliet
parents: 1277
diff changeset
   104
    def prepare_tags(self, obj):
1304
e72664657829 v1.53.4 prefetch in indexes
cavaliet
parents: 1302
diff changeset
   105
        return ",".join([tag.name for tag in obj.tags.all()])
1477
1de00f96a047 Corrections for elasticsearch (server) >= 2.2 and elasticserach (client lib) >= 2.3 (+version)
ymh <ymh.work@gmail.com>
parents: 1373
diff changeset
   106
1304
e72664657829 v1.53.4 prefetch in indexes
cavaliet
parents: 1302
diff changeset
   107
    def index_queryset(self, using=None):
e72664657829 v1.53.4 prefetch in indexes
cavaliet
parents: 1302
diff changeset
   108
        "Used when the entire index for model is updated."
1357
dd3b4c9d5035 add command to create media and content
ymh <ymh.work@gmail.com>
parents: 1304
diff changeset
   109
        return self.get_model().objects.prefetch_related("tags")