web/ldt/ldt_utils/contentindexer.py
author ymh <ymh.work@gmail.com>
Mon, 13 Dec 2010 23:55:19 +0100
changeset 22 83b28fc0d731
parent 1 3a30d255c235
permissions -rw-r--r--
improve on ldt test framework start migration for text test
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
1
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
     1
import tempfile
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
     2
import os
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
     3
import os.path
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
     4
import shutil
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
     5
from ldt.utils import zipfileext
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
     6
import urllib
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
     7
import ldt.utils.xml
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
     8
from django.conf import settings
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
     9
from models import Content
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    10
import fnmatch
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    11
import uuid
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    12
import shutil
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    13
import lucene
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    14
from ldt.ldt_utils import STORE
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    15
from ldt.ldt_utils import ANALYZER
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    16
import lxml.etree
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    17
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    18
def Property(func):
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    19
    return property(**func()) 
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    20
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    21
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    22
class ContentIndexer(object):
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    23
        
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    24
        def __init__(self, contentList, writer, decoupage_blackList = settings.DECOUPAGE_BLACKLIST):
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    25
                self.__contentList = contentList
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    26
                self.__decoupage_blacklist = decoupage_blackList
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    27
                self.__writer = writer
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    28
                    
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    29
        @Property
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    30
        def decoupage_blacklist(): #@NoSelf
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    31
            doc = """get blacklist""" #@UnusedVariable
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    32
           
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    33
            def fget(self):
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    34
                if self.__decoupage_blacklist is None:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    35
                    self.__decoupage_blacklist = ()
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    36
                return self.__decoupage_blacklist
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    37
               
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    38
            def fset(self, value):
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    39
                self.__decoupage_blacklist = value
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    40
               
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    41
            def fdel(self):
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    42
                del self.__decoupage_blacklist
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    43
               
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    44
            return locals()
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    45
                   
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    46
        def index_all(self):
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    47
            for content in self.__contentList:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    48
                self.index_content(content)
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    49
                
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    50
        def index_content(self, content):
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    51
            url =content.iri_url()
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    52
            filepath = urllib.urlopen(url)
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    53
            doc = lxml.etree.fromstring(filepath) 
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    54
           
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    55
            self.__writer.deleteDocuments(lucene.Term("iri_id", content.iri_id))
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    56
            
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    57
            res = doc.xpath("/iri/body/ensembles/ensemble")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    58
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    59
            for ensemble in res:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    60
                ensembleId = ensemble.get(None,u"id")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    61
                
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    62
                for decoupageNode in ensemble.getchildren():
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    63
                    if decoupageNode.tag != "decoupage"  or decoupageNode.get(None,u"id") in self.decoupage_blacklist:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    64
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    65
                        continue
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    66
                    
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    67
                    decoupId = decoupageNode.get(None,u"id")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    68
                    res = decoupageNode.xpath("elements/element")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    69
                    for elementNode in res:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    70
                        doc = lucene.Document()
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    71
                        elementId = elementNode.get(None,u"id")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    72
                        tags = elementNode.get(None,u"tags")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    73
                        
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    74
                        if tags is not None:                            
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    75
                            tags.replace(",", ";")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    76
                        
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    77
                        if tags is None or len(tags) == 0:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    78
                            tags = ""
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    79
                            restagnode = elementNode.xpath("tag/text()")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    80
                            for tagnode in restagnode:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    81
                                tags = tags + " ; " + tagnode.text()
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    82
                                
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    83
                        if tags is None or len(tags) == 0:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    84
                            tags = ""
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    85
                            restagnode = elementNode.xpath("tags/tag/text()")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    86
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    87
                            for tagnode in restagnode:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    88
                                tags = tags + " ; " + tagnode.text()
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    89
    
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    90
                        title = ""
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    91
                        for txtRes in elementNode.xpath("title/text()"): 
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    92
                            title = title + txtRes.text()
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    93
                
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    94
                        abstract = ""
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    95
                        for txtRes in elementNode.xpath("abstract/text()"): 
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    96
                            abstract = abstract + txtRes.text()
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    97
                
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    98
                        doc.add(lucene.Field("iri_id", content.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
    99
                        doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO))
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   100
                        doc.add(lucene.Field("decoupage_id", decoupId, lucene.Field.Store.YES, lucene.Field.Index.NO))
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   101
                        doc.add(lucene.Field("element_id", elementId, lucene.Field.Store.YES, lucene.Field.Index.NO))                        
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   102
                        doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   103
                        doc.add(lucene.Field("title", title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   104
                        doc.add(lucene.Field("abstract", abstract, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   105
                        doc.add(lucene.Field("all", " ".join([tags, title, abstract]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   106
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   107
                        seg = Segment(content=content,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   108
                                      iri_id=content.iri_id,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   109
                                      ensemble_id=ensembleId,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   110
                                      cutting_id=decoupId,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   111
                                      element_id=elementId,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   112
                                      tags=tags,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   113
                                      title=title,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   114
                                      abstract=abstract,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   115
                                      duration=duration,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   116
                                      author=author,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   117
                                      start_ts=start_ts,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   118
                                      date=date_str)
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   119
                        seg.save()
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   120
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   121
            
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   122
                        self.__writer.addDocument(doc)
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   123
            
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   124
            self.__writer.commit()
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   125
            
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   126
            
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   127
class ProjectIndexer(object):
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   128
        
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   129
        def __init__(self, projectList, writer, decoupage_blackList = settings.DECOUPAGE_BLACKLIST):
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   130
                self.__projectList = projectList
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   131
                self.__decoupage_blacklist = decoupage_blackList
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   132
                self.__writer = writer
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   133
                
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   134
        @Property
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   135
        def decoupage_blacklist(): #@NoSelf
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   136
            doc = """get blacklist""" #@UnusedVariable
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   137
           
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   138
            def fget(self):
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   139
                if self.__decoupage_blacklist is None:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   140
                    self.__decoupage_blacklist = ()
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   141
                return self.__decoupage_blacklist
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   142
               
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   143
            def fset(self, value):
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   144
                self.__decoupage_blacklist = value
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   145
               
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   146
            def fdel(self):
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   147
                del self.__decoupage_blacklist
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   148
               
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   149
            return locals()
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   150
                   
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   151
        def index_all(self):
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   152
            for project in self.__projectList:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   153
                self.index_project(project)
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   154
 
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   155
        def index_project(self, project):
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   156
            
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   157
            doc = lxml.etree.fromstring(project.ldt)
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   158
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   159
            self.__writer.deleteDocuments(lucene.Term("iri_id", project.iri_id))
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   160
            
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   161
            res = doc.xpath("/iri/annotations/content")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   162
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   163
            for content in res:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   164
                contentId = content.get(None,u"id")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   165
 
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   166
                ensembleId = "ens_perso"
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   167
                
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   168
                for decoupageNode in content.getchildren():
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   169
                    if decoupageNode.tag != "decoupage"  or decoupageNode.get(None,"id") in self.decoupage_blacklist:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   170
                        continue
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   171
                    
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   172
                    decoupId = decoupageNode.get(None,u"id")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   173
                    res = decoupageNode.xpath("elements/element")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   174
                    for elementNode in res:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   175
                        doc = lucene.Document()
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   176
                        elementId = elementNode.get(None,u"id")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   177
                        tags = elementNode.get(None,u"tags")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   178
                        
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   179
                        if tags is not None:                            
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   180
                            tags.replace(",", ";")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   181
                        
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   182
                        if tags is None or len(tags) == 0:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   183
                            tags = ""
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   184
                            restagnode = elementNode.xpath("tag/text()")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   185
                            for tagnode in restagnode:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   186
                                tags = tags + " ; " + tagnode.text()
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   187
                                
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   188
                        if tags is None or len(tags) == 0:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   189
                            tags = ""
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   190
                            restagnode = elementNode.xpath("tags/tag/text()")
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   191
                            for tagnode in restagnode:
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   192
                                tags = tags + " ; " + tagnode.text()                  
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   193
    
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   194
                        title = ""
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   195
                        for txtRes in elementNode.xpath("title/text()"): 
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   196
                            title = title + txtRes.text()
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   197
                
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   198
                        abstract = ""
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   199
                        for txtRes in elementNode.xpath("abstract/text()"): 
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   200
                            abstract = abstract + txtRes.text()
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   201
                
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   202
                        doc.add(lucene.Field("project_id", project.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))              
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   203
                        doc.add(lucene.Field("iri_id", contentId, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   204
                        doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO))
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   205
                        doc.add(lucene.Field("decoupage_id", decoupId, lucene.Field.Store.YES, lucene.Field.Index.NO))
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   206
                        doc.add(lucene.Field("element_id", elementId, lucene.Field.Store.YES, lucene.Field.Index.NO))
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   207
                        doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   208
                        doc.add(lucene.Field("title", title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   209
                        doc.add(lucene.Field("abstract", abstract, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   210
                        doc.add(lucene.Field("all", " ".join([tags, title, abstract]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   211
                        
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   212
                        seg = Segment(content=content,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   213
                                      iri_id=content.iri_id,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   214
                                      ensemble_id=ensembleId,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   215
                                      cutting_id=decoupId,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   216
                                      element_id=elementId,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   217
                                      tags=tags,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   218
                                      title=title,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   219
                                      abstract=abstract,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   220
                                      duration=duration,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   221
                                      author=author,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   222
                                      start_ts=start_ts,
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   223
                                      date=date_str)
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   224
                        seg.save()
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   225
                                    
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   226
                        self.__writer.addDocument(doc)
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   227
            
3a30d255c235 First version of API with tests
wakimd
parents:
diff changeset
   228
            self.__writer.commit()