--- a/web/ldt/ldt_utils/projectindexer.py Sun Nov 14 20:25:22 2010 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,108 +0,0 @@
-import tempfile
-import os
-import os.path
-import shutil
-import ldt.utils.xml
-from ldt import settings
-import lucene
-from ldt.ldt_utils import STORE
-from ldt.ldt_utils import ANALYZER
-import lxml.etree
-
-def Property(func):
- return property(**func())
-
-class ProjectIndexer(object):
- def __init__(self, projectList, writer, decoupage_blackList = settings.DECOUPAGE_BLACKLIST):
- self.__projectList = projectList
- self.__decoupage_blacklist = decoupage_blackList
- self.__writer = writer
-
- @Property
- def decoupage_blacklist(): #@NoSelf
- doc = """get blacklist""" #@UnusedVariable
-
- def fget(self):
- if self.__decoupage_blacklist is None:
- self.__decoupage_blacklist = ()
- return self.__decoupage_blacklist
-
- def fset(self, value):
- self.__decoupage_blacklist = value
-
- def fdel(self):
- del self.__decoupage_blacklist
-
- return locals()
-
- def index_all(self):
- for project in self.__projectList:
- self.index_project(project)
-
- def index_project(self, project):
- # ldt.utils.log.debug("Indexing project : "+str(project.ldt_id))
-
- ldt=project.ldt
- doc = lxml.etree.fromstring(ldt.encode( "utf-8" ))
-
- self.__writer.deleteDocuments(lucene.Term("ldt_id", project.ldt_id))
-
- res = doc.xpath("/iri/annotations/content")
- project.ldt.encode( "utf-8 " )
-
- for content in res:
- contentId = content.get("id")
-
- res =content.xpath("ensemble")
- for ensemble in res:
- ensembleId = ensemble.get("id")
-
- for decoupageNode in ensemble.getchildren():
- # ldt.utils.log.debug("Indexing project decoupage : "+ repr(decoupageNode.nodeType) + " in " + repr(self.decoupage_blacklist))
- if decoupageNode.tag != "decoupage" or decoupageNode.get("id") in self.decoupage_blacklist:
- continue
-
- decoupId = decoupageNode.get("id")
- res = decoupageNode.xpath("elements/element")
-
- for elementNode in res:
- doc = lucene.Document()
- elementId = elementNode.get("id")
- tags = elementNode.get("tags")
-
- if tags is not None:
- tags.replace(",", ";")
-
- if tags is None or len(tags) == 0:
- tags = ""
- restagnode = elementNode.xpath("tag/text()")
- for tagnode in restagnode:
- tags = tags + " ; " + tagnode.text()
-
- if tags is None or len(tags) == 0:
- tags = ""
- restagnode = elementNode.xpath("tags/tag/text()")
- for tagnode in restagnode:
- tags = tags + " ; " + tagnode.text()
-
- title = ""
- for txtRes in elementNode.xpath("title/text()"):
- title = title + txtRes.text()
-
- abstract = ""
- for txtRes in elementNode.xpath("abstract/text()"):
- abstract = abstract + txtRes.text()
-
- doc.add(lucene.Field("ldt_id", project.ldt_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
- doc.add(lucene.Field("iri_id", contentId, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
- doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO))
- doc.add(lucene.Field("decoupage_id", decoupId, lucene.Field.Store.YES, lucene.Field.Index.NO))
- doc.add(lucene.Field("element_id", elementId, lucene.Field.Store.YES, lucene.Field.Index.NO))
- doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
- doc.add(lucene.Field("title", title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
- doc.add(lucene.Field("abstract", abstract, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
- doc.add(lucene.Field("all", " ".join([tags, title, abstract]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
-
- self.__writer.addDocument(doc)
-
- self.__writer.commit()