web/blinkster/ldt/contentindexer.py
author ymh <ymh.work@gmail.com>
Tue, 25 May 2010 02:43:45 +0200
changeset 29 cc9b7e14412b
parent 16 146d86480e85
child 35 8b65c9054eac
permissions -rw-r--r--
update django and lucene
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
29
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
     1
import Ft
3
526ebd3988b0 replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
     2
from blinkster.utils import zipfileext
526ebd3988b0 replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
     3
import blinkster.utils.log
526ebd3988b0 replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
     4
import blinkster.utils.xml
526ebd3988b0 replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
     5
from blinkster import settings
16
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
     6
from blinkster.ldt.models import Content, Segment
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
import xml.dom.minidom
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
import xml.dom.ext
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
import xml.xpath
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
import lucene
3
526ebd3988b0 replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    11
from blinkster.ldt import STORE
526ebd3988b0 replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    12
from blinkster.ldt import ANALYZER
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
def Property(func):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
    return property(**func()) 
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
class ContentIndexer(object):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
        
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
        def __init__(self, contentList, writer, decoupage_blackList = settings.DECOUPAGE_BLACKLIST):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
                self.__contentList = contentList
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
                self.__decoupage_blacklist = decoupage_blackList
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
                self.__writer = writer
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
                    
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
        @Property
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
        def decoupage_blacklist(): #@NoSelf
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
            doc = """get blacklist""" #@UnusedVariable
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
           
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
            def fget(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
                if self.__decoupage_blacklist is None:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
                    self.__decoupage_blacklist = ()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
                return self.__decoupage_blacklist
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
               
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
            def fset(self, value):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
                self.__decoupage_blacklist = value
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
               
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
            def fdel(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
                del self.__decoupage_blacklist
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
               
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
            return locals()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
                   
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
        def index_all(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
            for content in self.__contentList:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
                self.index_content(content)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
                
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
        def index_content(self, content):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
            
3
526ebd3988b0 replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    48
            blinkster.utils.log.debug("Indexing content : "+str(content.iri_id))
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
            filepath = content.iri_file_path()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
            doc = xml.dom.minidom.parse(filepath)
29
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
    51
            doc = Ft.Xml.Domlette.ConvertDocument(doc)        
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
    52
                        
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
            self.__writer.deleteDocuments(lucene.Term("iri_id", content.iri_id))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
            
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
            con = xml.xpath.Context.Context(doc, 1, 1, None)
29
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
    56
            
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
            res = xml.xpath.Evaluate("/iri/body/ensembles/ensemble", context=con)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
            for ensemble in res:
29
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
    60
                ensembleId = ensemble.getAttributeNS(None,"id")
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
                
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
                for decoupageNode in ensemble.childNodes:
3
526ebd3988b0 replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
    63
                    blinkster.utils.log.debug("Indexing content decoupage : "+ repr(decoupageNode.nodeType) + " in " + repr(self.decoupage_blacklist))
29
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
    64
                    if decoupageNode.nodeType != xml.dom.Node.ELEMENT_NODE or decoupageNode.tagName != "decoupage"  or decoupageNode.getAttributeNS(None,"id") in self.decoupage_blacklist:
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
                        continue
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
                    
29
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
    67
                    decoupId = decoupageNode.getAttributeNS(None,"id")
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
                    res = xml.xpath.Evaluate("elements/element", decoupageNode)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
                    for elementNode in res:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
                        doc = lucene.Document()
29
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
    71
                        elementId = elementNode.getAttributeNS(None,"id")
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
    72
                        tags = elementNode.getAttributeNS(None,"tags")
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
    73
                        author = elementNode.getAttributeNS(None,"author")
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
    74
                        start_ts = int(elementNode.getAttributeNS(None,"begin"))
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
    75
                        duration = int(elementNode.getAttributeNS(None,"dur"))
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
    76
                        date_str = elementNode.getAttributeNS(None,"date")
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
                        
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
                        if tags is not None:                            
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
                            tags.replace(",", ";")
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    80
                        
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
                        if tags is None or len(tags) == 0:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
                            tags = ""
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
                            restagnode = xml.xpath.Evaluate("tag/text()", elementNode)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
                            for tagnode in restagnode:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
                                tags = tags + " ; " + tagnode.data
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
                                
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
                        if tags is None or len(tags) == 0:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
                            tags = ""
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
                            restagnode = xml.xpath.Evaluate("tags/tag/text()", elementNode)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
                            for tagnode in restagnode:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
                                tags = tags + " ; " + tagnode.data                            
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
    
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    93
                        title = ""
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
                        for txtRes in xml.xpath.Evaluate("title/text()", elementNode): 
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
                            title = title + txtRes.data 
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    96
                
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    97
                        abstract = ""
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    98
                        for txtRes in xml.xpath.Evaluate("abstract/text()", elementNode): 
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    99
                            abstract = abstract + txtRes.data 
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
                
29
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   101
                        doc.add(lucene.Field("iri_id", content.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   102
                        doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   103
                        doc.add(lucene.Field("decoupage_id", decoupId, lucene.Field.Store.YES, lucene.Field.Index.NO))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   104
                        doc.add(lucene.Field("element_id", elementId, lucene.Field.Store.YES, lucene.Field.Index.NO))
29
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   105
                        doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   106
                        doc.add(lucene.Field("title", title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   107
                        doc.add(lucene.Field("abstract", abstract, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   108
                        doc.add(lucene.Field("all", " ".join([tags, title, abstract]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   109
            
16
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   110
                        seg = Segment(content = content,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   111
                                      iri_id = content.iri_id,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   112
                                      ensemble_id = ensembleId,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   113
                                      cutting_id = decoupId,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   114
                                      element_id = elementId,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   115
                                      tags =  tags,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   116
                                      title = title,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   117
                                      abstract = abstract,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   118
                                      duration = duration,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   119
                                      author = author,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   120
                                      start_ts = start_ts,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   121
                                      date = date_str)
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   122
                        seg.save()
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   123
            
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   124
                        self.__writer.addDocument(doc)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   125
            
29
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   126
            self.__writer.commit()
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   127
            
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   128
            
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   129
class ProjectIndexer(object):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   130
        
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   131
        def __init__(self, projectList, writer, decoupage_blackList = settings.DECOUPAGE_BLACKLIST):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   132
                self.__projectList = projectList
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   133
                self.__decoupage_blacklist = decoupage_blackList
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   134
                self.__writer = writer
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   135
                
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   136
        @Property
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   137
        def decoupage_blacklist(): #@NoSelf
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   138
            doc = """get blacklist""" #@UnusedVariable
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   139
           
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   140
            def fget(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   141
                if self.__decoupage_blacklist is None:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   142
                    self.__decoupage_blacklist = ()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   143
                return self.__decoupage_blacklist
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   144
               
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   145
            def fset(self, value):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   146
                self.__decoupage_blacklist = value
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   147
               
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   148
            def fdel(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   149
                del self.__decoupage_blacklist
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   150
               
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   151
            return locals()
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   152
                   
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   153
        def index_all(self):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   154
            for project in self.__projectList:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   155
                self.index_project(project)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   156
 
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   157
        def index_project(self, project):
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   158
            
3
526ebd3988b0 replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   159
            blinkster.utils.log.debug("Indexing project : "+str(project.iri_id))
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   160
            doc = xml.dom.minidom.parseString(project.ldt)
29
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   161
            doc = Ft.Xml.Domlette.ConvertDocument(doc)        
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   162
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   163
            self.__writer.deleteDocuments(lucene.Term("iri_id", project.iri_id))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   164
            
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   165
            con = xml.xpath.Context.Context(doc, 1, 1, None)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   166
            res = xml.xpath.Evaluate("/iri/annotations/content", context=con)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   167
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   168
            for content in res:
29
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   169
                contentId = content.getAttributeNS(None,"id")
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   170
                
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   171
                ensembleId = "ens_perso"
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   172
                
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   173
                for decoupageNode in content.childNodes:
3
526ebd3988b0 replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   174
                    blinkster.utils.log.debug("Indexing content decoupage : "+ repr(decoupageNode.nodeType) + " in " + repr(self.decoupage_blacklist))
29
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   175
                    if decoupageNode.nodeType != xml.dom.Node.ELEMENT_NODE or decoupageNode.tagName != "decoupage"  or decoupageNode.getAttributeNS(None,"id") in self.decoupage_blacklist:
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   176
                        continue
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   177
                    
29
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   178
                    decoupId = decoupageNode.getAttributeNS(None,"id")
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   179
                    res = xml.xpath.Evaluate("elements/element", decoupageNode)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   180
                    for elementNode in res:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   181
                        doc = lucene.Document()
29
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   182
                        elementId = elementNode.getAttributeNS(None,"id")
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   183
                        tags = elementNode.getAttributeNS(None,"tags")
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   184
                        author = elementNode.getAttributeNS(None,"author")
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   185
                        start_ts = int(elementNode.getAttributeNS(None,"begin"))
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   186
                        duration = int(elementNode.getAttributeNS(None,"dur"))
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   187
                        date_str = elementNode.getAttributeNS(None,"date")
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   188
                        
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   189
                        if tags is not None:                            
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   190
                            tags.replace(",", ";")
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   191
                        
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   192
                        if tags is None or len(tags) == 0:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   193
                            tags = ""
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   194
                            restagnode = xml.xpath.Evaluate("tag/text()", elementNode)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   195
                            for tagnode in restagnode:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   196
                                tags = tags + " ; " + tagnode.data
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   197
                                
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   198
                        if tags is None or len(tags) == 0:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   199
                            tags = ""
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   200
                            restagnode = xml.xpath.Evaluate("tags/tag/text()", elementNode)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   201
                            for tagnode in restagnode:
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   202
                                tags = tags + " ; " + tagnode.data                            
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   203
    
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   204
                        title = ""
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   205
                        for txtRes in xml.xpath.Evaluate("title/text()", elementNode): 
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   206
                            title = title + txtRes.data 
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   207
                
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   208
                        abstract = ""
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   209
                        for txtRes in xml.xpath.Evaluate("abstract/text()", elementNode): 
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   210
                            abstract = abstract + txtRes.data 
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   211
                
16
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   212
                
29
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   213
                        doc.add(lucene.Field("project_id", project.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))              
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   214
                        doc.add(lucene.Field("iri_id", contentId, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   215
                        doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   216
                        doc.add(lucene.Field("decoupage_id", decoupId, lucene.Field.Store.YES, lucene.Field.Index.NO))
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   217
                        doc.add(lucene.Field("element_id", elementId, lucene.Field.Store.YES, lucene.Field.Index.NO))
29
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   218
                        doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   219
                        doc.add(lucene.Field("title", title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   220
                        doc.add(lucene.Field("abstract", abstract, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
cc9b7e14412b update django and lucene
ymh <ymh.work@gmail.com>
parents: 16
diff changeset
   221
                        doc.add(lucene.Field("all", " ".join([tags, title, abstract]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   222
            
16
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   223
                        try:
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   224
                            content = Content.objects.get(iri_id = contentId)
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   225
                            seg = Segment( project_obj = project,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   226
                                      content = content,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   227
                                      project_id = project.ldt_id,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   228
                                      iri_id = contentId,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   229
                                      ensemble_id = ensembleId,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   230
                                      cutting_id = decoupId,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   231
                                      element_id = elementId,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   232
                                      tags =  tags,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   233
                                      title = title,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   234
                                      abstract = abstract,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   235
                                      duration = duration,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   236
                                      author = author,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   237
                                      start_ts = start_ts,
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   238
                                      date = date_str)
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   239
                            seg.save()
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   240
                        except:
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   241
                            blinkster.utils.log.error("unable to store segment")
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   242
                            
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   243
            
146d86480e85 add query for details
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   244
            
0
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   245
                        self.__writer.addDocument(doc)
0d40e90630ef Blinkster creation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   246
            
3
526ebd3988b0 replace pocketfilms occurence by blinkster
ymh <ymh.work@gmail.com>
parents: 0
diff changeset
   247
            self.__writer.flush()