web/ldt/ldt_utils/contentindexer.py
changeset 91 9c83809fda01
parent 62 39b2dab4f939
child 94 9927a619d2b5
equal deleted inserted replaced
80:1e7732f40eee 91:9c83809fda01
     2 import os
     2 import os
     3 import os.path
     3 import os.path
     4 import shutil
     4 import shutil
     5 from ldt.utils import zipfileext
     5 from ldt.utils import zipfileext
     6 import urllib
     6 import urllib
       
     7 # import ldt.utils.log
     7 import ldt.utils.xml
     8 import ldt.utils.xml
     8 from django.conf import settings
     9 from django.conf import settings
     9 from models import Content
    10 from models import Content
    10 import xml
    11 import xml
    11 import xml.dom
    12 import xml.dom
    17 import uuid
    18 import uuid
    18 import shutil
    19 import shutil
    19 import lucene
    20 import lucene
    20 from ldt.ldt_utils import STORE
    21 from ldt.ldt_utils import STORE
    21 from ldt.ldt_utils import ANALYZER
    22 from ldt.ldt_utils import ANALYZER
       
    23 ## import lxml.etree
    22 
    24 
    23 def Property(func):
    25 def Property(func):
    24     return property(**func()) 
    26     return property(**func()) 
    25 
    27 
    26 
    28 
    55         def index_content(self, content):
    57         def index_content(self, content):
    56             url =content.iri_url()
    58             url =content.iri_url()
    57             filepath = urllib.urlopen(url)
    59             filepath = urllib.urlopen(url)
    58             doc = xml.dom.minidom.parse(filepath)
    60             doc = xml.dom.minidom.parse(filepath)
    59             doc = Ft.Xml.Domlette.ConvertDocument(doc)
    61             doc = Ft.Xml.Domlette.ConvertDocument(doc)
    60                                    
    62             
    61             self.__writer.deleteDocuments(lucene.Term("iri_id", content.iri_id))
    63             self.__writer.deleteDocuments(lucene.Term("iri_id", content.iri_id))
    62             
    64             
    63             con = xml.xpath.Context.Context(doc, 1, 1, None)
    65             con = xml.xpath.Context.Context(doc, 1, 1, None)
    64             res = xml.xpath.Evaluate("/iri/body/ensembles/ensemble", context=con)
    66             res = xml.xpath.Evaluate("/iri/body/ensembles/ensemble", context=con)
    65 
    67 
    66             for ensemble in res:
    68             for ensemble in res:
    67                 ensembleId = ensemble.getAttributeNS(None,u"id")
    69                 ensembleId = ensemble.getAttributeNS(None,u"id")
    68                 
    70                 
    69                 for decoupageNode in ensemble.childNodes:
    71                 for decoupageNode in ensemble.childNodes:
    70                     if decoupageNode.nodeType != xml.dom.Node.ELEMENT_NODE or decoupageNode.tagName != "decoupage"  or decoupageNode.getAttributeNS(None,u"id") in self.decoupage_blacklist:
    72                     if decoupageNode.nodeType != xml.dom.Node.ELEMENT_NODE or decoupageNode.tagName != "decoupage"  or decoupageNode.getAttributeNS(None,u"id") in self.decoupage_blacklist:
       
    73 
    71                         continue
    74                         continue
    72                     
    75                     
    73                     decoupId = decoupageNode.getAttributeNS(None,u"id")
    76                     decoupId = decoupageNode.getAttributeNS(None,u"id")
    74                     res = xml.xpath.Evaluate("elements/element", decoupageNode)
    77                     res = xml.xpath.Evaluate("elements/element", decoupageNode)
    75                     for elementNode in res:
    78                     for elementNode in res:
    76                         doc = lucene.Document()
    79                         doc = lucene.Document()
    77                         elementId = elementNode.getAttributeNS(None,u"id")
    80                         elementId = elementNode.getAttributeNS(None,u"id")
    78                         tags = elementNode.getAttributeNS(None,u"tags")
    81                         tags = elementNode.getAttributeNS(None,u"tags")
    79                         
    82                         
    80                         if tags is not None:                            
    83                         if tags is not None:                            
    81                             tags.replace(";", ",")
    84                             tags.replace(",", ";")
    82                         
    85                         
    83                         if tags is None or len(tags) == 0:
    86                         if tags is None or len(tags) == 0:
    84                             tags = ""
    87                             tags = ""
    85                             restagnode = xml.xpath.Evaluate("tag/text()", elementNode)
    88                             restagnode = xml.xpath.Evaluate("tag/text()", elementNode)
    86                             for tagnode in restagnode:
    89                             for tagnode in restagnode:
    87                                 tags = tags + " , " + tagnode.data
    90                                 tags = tags + " ; " + tagnode.data
    88                                 
    91                                 
    89                         if tags is None or len(tags) == 0:
    92                         if tags is None or len(tags) == 0:
    90                             tags = ""
    93                             tags = ""
    91                             restagnode = xml.xpath.Evaluate("tags/tag/text()", elementNode)
    94                             restagnode = xml.xpath.Evaluate("tags/tag/text()", elementNode)
    92                             for tagnode in restagnode:
    95 
    93                                 tags = tags + " , " + tagnode.data                            
    96                             for tagnode in restagnode:
       
    97                                 tags = tags + " ; " + tagnode.data                            
    94     
    98     
    95                         title = ""
    99                         title = ""
    96                         for txtRes in xml.xpath.Evaluate("title/text()", elementNode): 
   100                         for txtRes in xml.xpath.Evaluate("title/text()", elementNode): 
    97                             title = title + txtRes.data 
   101                             title = title + txtRes.data 
    98                 
   102                 
   157             for project in self.__projectList:
   161             for project in self.__projectList:
   158                 self.index_project(project)
   162                 self.index_project(project)
   159  
   163  
   160         def index_project(self, project):
   164         def index_project(self, project):
   161             
   165             
       
   166             # pocketfilms.utils.log.debug("Indexing project : "+str(project.iri_id))
   162             doc = xml.dom.minidom.parseString(project.ldt)
   167             doc = xml.dom.minidom.parseString(project.ldt)
   163             doc = Ft.Xml.Domlette.ConvertDocument(doc) 
   168             doc = Ft.Xml.Domlette.ConvertDocument(doc) 
   164 
   169 
   165             self.__writer.deleteDocuments(lucene.Term("iri_id", project.iri_id))
   170             self.__writer.deleteDocuments(lucene.Term("iri_id", project.iri_id))
   166             
   171             
   167             con = xml.xpath.Context.Context(doc, 1, 1, None)
   172             con = xml.xpath.Context.Context(doc, 1, 1, None)
   168             res = xml.xpath.Evaluate("/iri/annotations/content", context=con)
   173             res = xml.xpath.Evaluate("/iri/annotations/content", context=con)
   169 
   174 
   170             for content in res:
   175             for content in res:
   171                 contentId = content.getAttributeNS(None,u"id")
   176                 contentId = content.getAttributeNS(None,u"id")
   172                 
   177  
   173                 ensembleId = "ens_perso"
   178                 ensembleId = "ens_perso"
   174                 
   179                 
   175                 for decoupageNode in content.childNodes:
   180                 for decoupageNode in content.childNodes:
       
   181                     # pocketfilms.utils.log.debug("Indexing content decoupage : "+ repr(decoupageNode.nodeType) + " in " + repr(self.decoupage_blacklist))
   176                     if decoupageNode.nodeType != xml.dom.Node.ELEMENT_NODE or decoupageNode.tagName != "decoupage"  or decoupageNode.getAttributeNS(None,"id") in self.decoupage_blacklist:
   182                     if decoupageNode.nodeType != xml.dom.Node.ELEMENT_NODE or decoupageNode.tagName != "decoupage"  or decoupageNode.getAttributeNS(None,"id") in self.decoupage_blacklist:
   177                         continue
   183 			continue
   178                     
   184                     
   179                     decoupId = decoupageNode.getAttributeNS(None,u"id")
   185                     decoupId = decoupageNode.getAttributeNS(None,u"id")
   180                     res = xml.xpath.Evaluate("elements/element", decoupageNode)
   186                     res = xml.xpath.Evaluate("elements/element", decoupageNode)
   181                     for elementNode in res:
   187                     for elementNode in res:
   182                         doc = lucene.Document()
   188                         doc = lucene.Document()
   183                         elementId = elementNode.getAttributeNS(None,u"id")
   189                         elementId = elementNode.getAttributeNS(None,u"id")
   184                         tags = elementNode.getAttributeNS(None,u"tags")
   190                         tags = elementNode.getAttributeNS(None,u"tags")
   185                         
   191                         
   186                         if tags is not None:                            
   192                         if tags is not None:                            
   187                             tags.replace(";", ",")
   193                             tags.replace(",", ";")
   188                         
   194                         
   189                         if tags is None or len(tags) == 0:
   195                         if tags is None or len(tags) == 0:
   190                             tags = ""
   196                             tags = ""
   191                             restagnode = xml.xpath.Evaluate("tag/text()", elementNode)
   197                             restagnode = xml.xpath.Evaluate("tag/text()", elementNode)
   192                             for tagnode in restagnode:
   198                             for tagnode in restagnode:
   193                                 tags = tags + " , " + tagnode.data
   199                                 tags = tags + " ; " + tagnode.data
   194                                 
   200                                 
   195                         if tags is None or len(tags) == 0:
   201                         if tags is None or len(tags) == 0:
   196                             tags = ""
   202                             tags = ""
   197                             restagnode = xml.xpath.Evaluate("tags/tag/text()", elementNode)
   203                             restagnode = xml.xpath.Evaluate("tags/tag/text()", elementNode)
   198                             for tagnode in restagnode:
   204                             for tagnode in restagnode:
   199                                 tags = tags + " , " + tagnode.data                            
   205                                 tags = tags + " ; " + tagnode.data                            
   200     
   206     
   201                         title = ""
   207                         title = ""
   202                         for txtRes in xml.xpath.Evaluate("title/text()", elementNode): 
   208                         for txtRes in xml.xpath.Evaluate("title/text()", elementNode): 
   203                             title = title + txtRes.data 
   209                             title = title + txtRes.data 
   204                 
   210