web/ldt/ldt_utils/contentindexer.py
author wakimd
Fri, 15 Oct 2010 12:38:52 +0200
changeset 95 9bae869b2146
parent 94 9927a619d2b5
permissions -rw-r--r--
Merge
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
import tempfile
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
import os
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
import os.path
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
import shutil
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
from ldt.utils import zipfileext
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
import urllib
91
9c83809fda01 migration static elements
wakimd
parents: 62
diff changeset
     7
# import ldt.utils.log
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
import ldt.utils.xml
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
from django.conf import settings
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
from models import Content
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
import fnmatch
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
import uuid
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
import shutil
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
import lucene
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
from ldt.ldt_utils import STORE
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
from ldt.ldt_utils import ANALYZER
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    17
import lxml.etree
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
def Property(func):
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
    return property(**func()) 
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
class ContentIndexer(object):
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
        
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
        def __init__(self, contentList, writer, decoupage_blackList = settings.DECOUPAGE_BLACKLIST):
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
                self.__contentList = contentList
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
                self.__decoupage_blacklist = decoupage_blackList
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
                self.__writer = writer
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
                    
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
        @Property
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
        def decoupage_blacklist(): #@NoSelf
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
            doc = """get blacklist""" #@UnusedVariable
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
           
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
            def fget(self):
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
                if self.__decoupage_blacklist is None:
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
                    self.__decoupage_blacklist = ()
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
                return self.__decoupage_blacklist
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
               
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
            def fset(self, value):
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
                self.__decoupage_blacklist = value
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
               
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
            def fdel(self):
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
                del self.__decoupage_blacklist
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
               
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
            return locals()
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
                   
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
        def index_all(self):
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
            for content in self.__contentList:
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
                self.index_content(content)
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
                
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
        def index_content(self, content):
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
            url =content.iri_url()
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
            filepath = urllib.urlopen(url)
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    54
            doc = lxml.etree.fromstring(filepath) 
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    55
           
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
            self.__writer.deleteDocuments(lucene.Term("iri_id", content.iri_id))
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
            
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    58
            res = doc.xpath("/iri/body/ensembles/ensemble")
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
            for ensemble in res:
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    61
                ensembleId = ensemble.get(None,u"id")
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
                
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    63
                for decoupageNode in ensemble.getchildren():
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    64
                    if decoupageNode.tag != "decoupage"  or decoupageNode.get(None,u"id") in self.decoupage_blacklist:
91
9c83809fda01 migration static elements
wakimd
parents: 62
diff changeset
    65
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
                        continue
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
                    
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    68
                    decoupId = decoupageNode.get(None,u"id")
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    69
                    res = decoupageNode.xpath("elements/element")
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
                    for elementNode in res:
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
                        doc = lucene.Document()
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    72
                        elementId = elementNode.get(None,u"id")
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    73
                        tags = elementNode.get(None,u"tags")
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
                        
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
                        if tags is not None:                            
91
9c83809fda01 migration static elements
wakimd
parents: 62
diff changeset
    76
                            tags.replace(",", ";")
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
                        
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
                        if tags is None or len(tags) == 0:
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
                            tags = ""
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    80
                            restagnode = elementNode.xpath("tag/text()")
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
                            for tagnode in restagnode:
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    82
                                tags = tags + " ; " + tagnode.text()
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
                                
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
                        if tags is None or len(tags) == 0:
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
                            tags = ""
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    86
                            restagnode = elementNode.xpath("tags/tag/text()")
91
9c83809fda01 migration static elements
wakimd
parents: 62
diff changeset
    87
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
                            for tagnode in restagnode:
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    89
                                tags = tags + " ; " + tagnode.text()
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
    
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
                        title = ""
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    92
                        for txtRes in elementNode.xpath("title/text()"): 
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    93
                            title = title + txtRes.text()
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
                
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
                        abstract = ""
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    96
                        for txtRes in elementNode.xpath("abstract/text()"): 
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
    97
                            abstract = abstract + txtRes.text()
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    98
                
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
    99
                        doc.add(lucene.Field("iri_id", content.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
                        doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO))
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   101
                        doc.add(lucene.Field("decoupage_id", decoupId, lucene.Field.Store.YES, lucene.Field.Index.NO))
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   102
                        doc.add(lucene.Field("element_id", elementId, lucene.Field.Store.YES, lucene.Field.Index.NO))                        
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   103
                        doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   104
                        doc.add(lucene.Field("title", title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   105
                        doc.add(lucene.Field("abstract", abstract, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   106
                        doc.add(lucene.Field("all", " ".join([tags, title, abstract]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   107
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   108
                        seg = Segment(content=content,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   109
                                      iri_id=content.iri_id,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   110
                                      ensemble_id=ensembleId,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   111
                                      cutting_id=decoupId,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   112
                                      element_id=elementId,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   113
                                      tags=tags,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   114
                                      title=title,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   115
                                      abstract=abstract,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   116
                                      duration=duration,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   117
                                      author=author,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   118
                                      start_ts=start_ts,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   119
                                      date=date_str)
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   120
                        seg.save()
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   121
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   122
            
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   123
                        self.__writer.addDocument(doc)
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   124
            
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   125
            self.__writer.commit()
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   126
            
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   127
            
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   128
class ProjectIndexer(object):
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   129
        
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   130
        def __init__(self, projectList, writer, decoupage_blackList = settings.DECOUPAGE_BLACKLIST):
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   131
                self.__projectList = projectList
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   132
                self.__decoupage_blacklist = decoupage_blackList
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   133
                self.__writer = writer
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   134
                
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   135
        @Property
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   136
        def decoupage_blacklist(): #@NoSelf
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   137
            doc = """get blacklist""" #@UnusedVariable
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   138
           
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   139
            def fget(self):
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   140
                if self.__decoupage_blacklist is None:
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   141
                    self.__decoupage_blacklist = ()
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   142
                return self.__decoupage_blacklist
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   143
               
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   144
            def fset(self, value):
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   145
                self.__decoupage_blacklist = value
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   146
               
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   147
            def fdel(self):
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   148
                del self.__decoupage_blacklist
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   149
               
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   150
            return locals()
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   151
                   
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   152
        def index_all(self):
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   153
            for project in self.__projectList:
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   154
                self.index_project(project)
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   155
 
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   156
        def index_project(self, project):
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   157
            
91
9c83809fda01 migration static elements
wakimd
parents: 62
diff changeset
   158
            # pocketfilms.utils.log.debug("Indexing project : "+str(project.iri_id))
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   159
            doc = lxml.etree.fromstring(project.ldt)
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   160
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   161
            self.__writer.deleteDocuments(lucene.Term("iri_id", project.iri_id))
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   162
            
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   163
            res = doc.xpath("/iri/annotations/content")
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   164
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   165
            for content in res:
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   166
                contentId = content.get(None,u"id")
91
9c83809fda01 migration static elements
wakimd
parents: 62
diff changeset
   167
 
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   168
                ensembleId = "ens_perso"
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   169
                
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   170
                for decoupageNode in content.getchildren():
91
9c83809fda01 migration static elements
wakimd
parents: 62
diff changeset
   171
                    # pocketfilms.utils.log.debug("Indexing content decoupage : "+ repr(decoupageNode.nodeType) + " in " + repr(self.decoupage_blacklist))
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   172
                    if decoupageNode.tag != "decoupage"  or decoupageNode.get(None,"id") in self.decoupage_blacklist:
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   173
                        continue
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   174
                    
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   175
                    decoupId = decoupageNode.get(None,u"id")
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   176
                    res = decoupageNode.xpath("elements/element")
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   177
                    for elementNode in res:
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   178
                        doc = lucene.Document()
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   179
                        elementId = elementNode.get(None,u"id")
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   180
                        tags = elementNode.get(None,u"tags")
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   181
                        
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   182
                        if tags is not None:                            
91
9c83809fda01 migration static elements
wakimd
parents: 62
diff changeset
   183
                            tags.replace(",", ";")
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   184
                        
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   185
                        if tags is None or len(tags) == 0:
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   186
                            tags = ""
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   187
                            restagnode = elementNode.xpath("tag/text()")
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   188
                            for tagnode in restagnode:
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   189
                                tags = tags + " ; " + tagnode.text()
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   190
                                
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   191
                        if tags is None or len(tags) == 0:
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   192
                            tags = ""
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   193
                            restagnode = elementNode.xpath("tags/tag/text()")
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   194
                            for tagnode in restagnode:
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   195
                                tags = tags + " ; " + tagnode.text()                  
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   196
    
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   197
                        title = ""
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   198
                        for txtRes in elementNode.xpath("title/text()"): 
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   199
                            title = title + txtRes.text()
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   200
                
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   201
                        abstract = ""
94
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   202
                        for txtRes in elementNode.xpath("abstract/text()"): 
9927a619d2b5 Merge and corrections due to merge
wakimd
parents: 91
diff changeset
   203
                            abstract = abstract + txtRes.text()
0
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   204
                
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   205
                        doc.add(lucene.Field("project_id", project.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))              
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   206
                        doc.add(lucene.Field("iri_id", contentId, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   207
                        doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO))
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   208
                        doc.add(lucene.Field("decoupage_id", decoupId, lucene.Field.Store.YES, lucene.Field.Index.NO))
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   209
                        doc.add(lucene.Field("element_id", elementId, lucene.Field.Store.YES, lucene.Field.Index.NO))
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   210
                        doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   211
                        doc.add(lucene.Field("title", title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   212
                        doc.add(lucene.Field("abstract", abstract, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   213
                        doc.add(lucene.Field("all", " ".join([tags, title, abstract]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   214
                        
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   215
                        seg = Segment(content=content,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   216
                                      iri_id=content.iri_id,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   217
                                      ensemble_id=ensembleId,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   218
                                      cutting_id=decoupId,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   219
                                      element_id=elementId,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   220
                                      tags=tags,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   221
                                      title=title,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   222
                                      abstract=abstract,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   223
                                      duration=duration,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   224
                                      author=author,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   225
                                      start_ts=start_ts,
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   226
                                      date=date_str)
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   227
                        seg.save()
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   228
                                    
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   229
                        self.__writer.addDocument(doc)
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   230
            
cc4a51750724 first commit
ymh <ymh.work@gmail.com>
parents:
diff changeset
   231
            self.__writer.commit()