--- a/src/ldt/ldt/__init__.py Sun May 01 03:10:32 2011 +0200
+++ b/src/ldt/ldt/__init__.py Sun May 01 03:30:40 2011 +0200
@@ -14,3 +14,6 @@
__version__ = get_version()
+
+#initialize
+from ldt.ldt_utils import contentindexer
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/ldt/ldt/indexation/__init__.py Sun May 01 03:30:40 2011 +0200
@@ -0,0 +1,15 @@
+from django.conf import settings
+import lucene
+
+lucene.initVM(lucene.CLASSPATH)
+
+STORE = lucene.SimpleFSDirectory(lucene.File(settings.INDEX_PATH))
+ANALYZER = lucene.PerFieldAnalyzerWrapper(lucene.StandardAnalyzer(lucene.Version.LUCENE_CURRENT))
+ANALYZER.addAnalyzer("tags", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
+ANALYZER.addAnalyzer("title", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
+ANALYZER.addAnalyzer("abstract", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
+ANALYZER.addAnalyzer("all", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
+
+def get_writer():
+ return lucene.IndexWriter(STORE, ANALYZER, True, lucene.IndexWriter.MaxFieldLength.UNLIMITED)
+
--- a/src/ldt/ldt/ldt_utils/__init__.py Sun May 01 03:10:32 2011 +0200
+++ b/src/ldt/ldt/ldt_utils/__init__.py Sun May 01 03:30:40 2011 +0200
@@ -1,15 +1,2 @@
-import lucene
-from django.conf import settings
-
-lucene.initVM(lucene.CLASSPATH)
-
-STORE = lucene.SimpleFSDirectory(lucene.File(settings.INDEX_PATH))
-ANALYZER = lucene.PerFieldAnalyzerWrapper(lucene.StandardAnalyzer(lucene.Version.LUCENE_CURRENT))
-ANALYZER.addAnalyzer("tags", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
-ANALYZER.addAnalyzer("title", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
-ANALYZER.addAnalyzer("abstract", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
-ANALYZER.addAnalyzer("all", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
-
-
VERSION = (0, 1)
VERSION_STR = unicode(".".join(map(lambda i:"%02d" % (i,), VERSION)))
--- a/src/ldt/ldt/ldt_utils/admin.py Sun May 01 03:10:32 2011 +0200
+++ b/src/ldt/ldt/ldt_utils/admin.py Sun May 01 03:30:40 2011 +0200
@@ -2,11 +2,11 @@
from django.contrib import admin
from django.shortcuts import render_to_response
from django.template import RequestContext
-from ldt.ldt_utils import STORE, ANALYZER
-from ldt.ldt_utils.contentindexer import ContentIndexer
+from ldt.ldt_utils.contentindexer import ContentIndexer, ProjectIndexer
from ldt.ldt_utils.fileimport import FileImport, FileImportError
from ldt.ldt_utils.forms import LdtImportForm, ReindexForm
from ldt.ldt_utils.models import Content, Project, Media, Author
+import ldt.indexation
import lucene
@@ -48,10 +48,16 @@
form = ReindexForm(request.POST)
if form.is_valid():
# try:
- writer = lucene.IndexWriter(STORE, ANALYZER, True, lucene.IndexWriter.MaxFieldLength.UNLIMITED)
+ writer = ldt.indexation.get_writer()
contentList = form.cleaned_data["contents"]
indexer = ContentIndexer(contentList, writer)
indexer.index_all()
+
+ index_projects = form.cleaned_data["index_projects"]
+ if index_projects:
+ projectList = Project.objects.filter(contents__in=contentList).distinct() #filter(contents__in=contentList) @UndefinedVariable
+ indexer = ProjectIndexer(projectList, writer)
+ indexer.index_all()
writer.close()
message = "Indexation ok : " + repr(form.cleaned_data["contents"])
--- a/src/ldt/ldt/ldt_utils/contentindexer.py Sun May 01 03:10:32 2011 +0200
+++ b/src/ldt/ldt/ldt_utils/contentindexer.py Sun May 01 03:30:40 2011 +0200
@@ -1,11 +1,15 @@
from django.conf import settings
-from ldt.ldt_utils.models import Segment
+from django.db.models.signals import post_save
+from django.dispatch import receiver
+from ldt.ldt_utils.models import Segment, Content, Project
from ldt.ldt_utils.utils import reduce_text_node
+import ldt.indexation
import lucene
import lxml.etree
import urllib #@UnresolvedImport
# import ldt.utils.log
+
def Property(func):
return property(**func())
@@ -44,6 +48,7 @@
doc = lxml.etree.parse(filepath) #@UndefinedVariable
self.__writer.deleteDocuments(lucene.Term("iri_id", content.iri_id))
+ Segment.objects.filter(iri_id=content.iri_id).delete() #@UndefinedVariable
res = doc.xpath("/iri/body/ensembles/ensemble")
@@ -149,80 +154,98 @@
# pocketfilms.utils.log.debug("Indexing project : "+str(project.iri_id))
doc = lxml.etree.fromstring(project.ldt) #@UndefinedVariable
- self.__writer.deleteDocuments(lucene.Term("iri_id", project.iri_id))
+ self.__writer.deleteDocuments(lucene.Term("project_id", project.ldt_id))
+ Segment.objects.filter(project_obj__ldt_id=project.ldt_id).delete()
res = doc.xpath("/iri/annotations/content")
for content in res:
contentId = content.get(u"id", None)
-
- ensembleId = "ens_perso"
+ content_obj = None
- for decoupageNode in content.getchildren():
- # pocketfilms.utils.log.debug("Indexing content decoupage : "+ repr(decoupageNode.nodeType) + " in " + repr(self.decoupage_blacklist))
- if decoupageNode.tag != "decoupage" or decoupageNode.get(u"id", None) in self.decoupage_blacklist:
- continue
-
- decoupId = decoupageNode.get(u"id", None)
- res = decoupageNode.xpath("elements/element")
- for elementNode in res:
- doc = lucene.Document()
- elementId = elementNode.get(u"id", None)
- tags = elementNode.get(u"tags", None)
-
- if tags is not None:
- tags.replace(",", ";")
-
- if tags is None or len(tags) == 0:
- tags = ""
- restagnode = elementNode.xpath("tag/text()")
- for tagnode in restagnode:
- tags = tags + " ; " + tagnode.text()
-
- if tags is None or len(tags) == 0:
- tags = ""
- restagnode = elementNode.xpath("tags/tag/text()")
- for tagnode in restagnode:
- tags = tags + " ; " + tagnode.text()
-
- title = reduce_text_node("")
- for txtRes in elementNode.xpath("title/text()"):
- title = title + txtRes.text()
+ clist = Content.objects.filter(iri_id = contentId)
+ if len(clist) > 0:
+ content_obj = clist[0]
+
+ for ensembleNode in content.getchildren():
+ ensembleId = ensembleNode.get(u"id",None)
- abstract = ""
- for txtRes in elementNode.xpath("abstract/text()"):
- abstract = abstract + txtRes.text()
-
- author = elementNode.get("author", "")
- start_ts = int(elementNode.get("begin", "-1"))
- duration = int(elementNode.get("dur", "-1"))
- date_str = elementNode.get("date", "")
-
-
- doc.add(lucene.Field("project_id", project.iri_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
- doc.add(lucene.Field("iri_id", contentId, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
- doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO))
- doc.add(lucene.Field("decoupage_id", decoupId, lucene.Field.Store.YES, lucene.Field.Index.NO))
- doc.add(lucene.Field("element_id", elementId, lucene.Field.Store.YES, lucene.Field.Index.NO))
- doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
- doc.add(lucene.Field("title", title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
- doc.add(lucene.Field("abstract", abstract, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
- doc.add(lucene.Field("all", " ".join([tags, title, abstract]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
+ for decoupageNode in ensembleNode.getchildren():
+ # pocketfilms.utils.log.debug("Indexing content decoupage : "+ repr(decoupageNode.nodeType) + " in " + repr(self.decoupage_blacklist))
+ if decoupageNode.tag != "decoupage" or decoupageNode.get(u"id", None) in self.decoupage_blacklist:
+ continue
- seg = Segment(content=content,
- iri_id=content.iri_id,
- ensemble_id=ensembleId,
- cutting_id=decoupId,
- element_id=elementId,
- tags=tags,
- title=title,
- abstract=abstract,
- duration=duration,
- author=author,
- start_ts=start_ts,
- date=date_str)
- seg.save()
+ decoupId = decoupageNode.get(u"id", None)
+ res = decoupageNode.xpath("elements/element")
+ for elementNode in res:
+ doc = lucene.Document()
+ elementId = elementNode.get(u"id", None)
+ tags = elementNode.get(u"tags", None)
+
+ if tags is not None:
+ tags.replace(",", ";")
+
+ if tags is None or len(tags) == 0:
+ tags = u""
+ restagnode = elementNode.xpath("tag/text()", smart_strings=False)
+ for tagnode in restagnode:
+ tags = tags + u" ; " + tagnode
- self.__writer.addDocument(doc)
+ if tags is None or len(tags) == 0:
+ tags = u""
+ restagnode = elementNode.xpath("tags/tag/text()", smart_strings=False)
+
+ for tagnode in restagnode:
+ tags = tags + u" ; " + tagnode
+
+ title = reduce_text_node(elementNode, "title/text()")
+ abstract = reduce_text_node(elementNode, "abstract/text()")
+
+ author = elementNode.get("author", "")
+ start_ts = int(elementNode.get("begin", "-1"))
+ duration = int(elementNode.get("dur", "-1"))
+ date_str = elementNode.get("date", "")
+
+
+ doc.add(lucene.Field("type_doc", "annotation", lucene.Field.Store.NO, lucene.Field.Index.NOT_ANALYZED))
+ doc.add(lucene.Field("project_id", project.ldt_id, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
+ doc.add(lucene.Field("iri_id", contentId, lucene.Field.Store.YES, lucene.Field.Index.NOT_ANALYZED))
+ doc.add(lucene.Field("ensemble_id", ensembleId, lucene.Field.Store.YES, lucene.Field.Index.NO))
+ doc.add(lucene.Field("decoupage_id", decoupId, lucene.Field.Store.YES, lucene.Field.Index.NO))
+ doc.add(lucene.Field("element_id", elementId, lucene.Field.Store.YES, lucene.Field.Index.NO))
+ doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
+ doc.add(lucene.Field("title", title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
+ doc.add(lucene.Field("abstract", abstract, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
+ doc.add(lucene.Field("all", " ".join([tags, title, abstract]), lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
+
+ seg = Segment(content=content_obj,
+ iri_id=contentId,
+ ensemble_id=ensembleId,
+ cutting_id=decoupId,
+ element_id=elementId,
+ tags=tags,
+ title=title,
+ abstract=abstract,
+ duration=duration,
+ author=author,
+ start_ts=start_ts,
+ date=date_str,
+ project_obj = project)
+ seg.save()
+
+ self.__writer.addDocument(doc)
self.__writer.commit()
+
+@receiver(post_save, sender=Project)
+def index_project(sender, **kwargs):
+ instance = kwargs['instance']
+ writer = ldt.indexation.get_writer()
+ if instance.state != 2:
+ writer.deleteDocuments(lucene.Term("project_id", instance.ldt_id))
+ Segment.objects.filter(project_obj__ldt_id=instance.ldt_id).delete()
+ else:
+ projectIndexer = ProjectIndexer([instance], writer)
+ projectIndexer.index_all()
+
+
--- a/src/ldt/ldt/ldt_utils/forms.py Sun May 01 03:10:32 2011 +0200
+++ b/src/ldt/ldt/ldt_utils/forms.py Sun May 01 03:30:40 2011 +0200
@@ -11,7 +11,7 @@
class LdtAddForm(forms.ModelForm):
title = forms.CharField()
- # contents = forms.ModelMultipleChoiceField(Content.objects.all())
+ contents = forms.ModelMultipleChoiceField(Content.objects.all())
# owner = forms.ModelChoiceField(Author.objects.all())
class Meta:
model = Project
@@ -19,6 +19,7 @@
class ReindexForm(forms.Form):
contents = forms.ModelMultipleChoiceField(Content.objects.all())
+ index_projects = forms.BooleanField(initial=False)
class SearchForm(forms.Form):
search = forms.CharField()
--- a/src/ldt/ldt/ldt_utils/models.py Sun May 01 03:10:32 2011 +0200
+++ b/src/ldt/ldt/ldt_utils/models.py Sun May 01 03:30:40 2011 +0200
@@ -3,7 +3,7 @@
from django.db import models
from django.utils.translation import ugettext_lazy as _
from ldt.core.models import Document, Owner
-from ldt.ldt_utils import STORE, ANALYZER
+import ldt.indexation
from utils import (create_ldt, copy_ldt, create_empty_iri, update_iri,
generate_uuid)
import lucene
@@ -131,7 +131,7 @@
def delete(self):
super(Content, self).delete()
- writer = lucene.IndexWriter(STORE, ANALYZER, True, lucene.IndexWriter.MaxFieldLength.UNLIMITED)
+ writer = ldt.indexation.get_writer()
writer.deleteDocuments(lucene.Term("iri_id", self.iri_id))
writer.commit()
@@ -161,7 +161,7 @@
#TODO: better manage the change in .iri name and error scenario (save in temp file + rename
def save(self, *args, **kwargs):
- self.sync_iri_file()
+ #self.sync_iri_file()
# update it
super(Content, self).save(*args, **kwargs)
--- a/src/ldt/ldt/ldt_utils/templates/admin/ldt_utils/app_action.html Sun May 01 03:10:32 2011 +0200
+++ b/src/ldt/ldt/ldt_utils/templates/admin/ldt_utils/app_action.html Sun May 01 03:30:40 2011 +0200
@@ -1,6 +1,6 @@
{% extends "admin/base_site.html" %}
{% load i18n %}
{% block breadcrumbs %}
-<div class="breadcrumbs"><a href="{% url admin:index %}"> {% trans "Home" %}</a> › <a href="{% url admin:app_list 'ldt' %}">ldt</a> › {{ current_action }}</div>
+<div class="breadcrumbs"><a href="{% url admin:index %}"> {% trans "Home" %}</a> › <a href="{% url admin:app_list 'ldt_utils' %}">ldt_utils</a> › {{ current_action }}</div>
{% endblock %}
--- a/src/ldt/ldt/ldt_utils/utils.py Sun May 01 03:10:32 2011 +0200
+++ b/src/ldt/ldt/ldt_utils/utils.py Sun May 01 03:30:40 2011 +0200
@@ -1,5 +1,5 @@
from django.conf import settings
-from ldt.ldt_utils import STORE
+from ldt.indexation import STORE
import datetime
import django.core.urlresolvers
import lucene
--- a/src/ldt/ldt/ldt_utils/views.py Sun May 01 03:10:32 2011 +0200
+++ b/src/ldt/ldt/ldt_utils/views.py Sun May 01 03:30:40 2011 +0200
@@ -269,7 +269,6 @@
Project.create_project(title=form.cleaned_data['title'], user=user, contents=form.cleaned_data['contents'])
form_status = "saved"
contents = []
- #return HttpResponseRedirect(reverse("ldt.ldt_utils.views.list_ldt"))
else:
form = LdtAddForm()
contents = Content.objects.all() #@UndefinedVariable
--- a/src/ldt/ldt/text/__init__.py Sun May 01 03:10:32 2011 +0200
+++ b/src/ldt/ldt/text/__init__.py Sun May 01 03:30:40 2011 +0200
@@ -1,16 +1,2 @@
-import lucene
-from django.conf import settings
-
-lucene.initVM(lucene.CLASSPATH)
-
-STORE = lucene.SimpleFSDirectory(lucene.File(settings.INDEX_PATH))
-ANALYZER = lucene.PerFieldAnalyzerWrapper(lucene.StandardAnalyzer(lucene.Version.LUCENE_CURRENT))
-ANALYZER.addAnalyzer("tags", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
-ANALYZER.addAnalyzer("title", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
-ANALYZER.addAnalyzer("abstract", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
-ANALYZER.addAnalyzer("all", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
-ANALYZER.addAnalyzer("type_doc", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
-
-
VERSION = (1, 0)
-VERSION_STR = unicode(".".join(map(lambda i:"%01d" % (i,), VERSION)))
+VERSION_STR = unicode(".".join(map(lambda i:"%01d" % (i,), VERSION)))
\ No newline at end of file
--- a/src/ldt/ldt/text/annotindexer.py Sun May 01 03:10:32 2011 +0200
+++ b/src/ldt/ldt/text/annotindexer.py Sun May 01 03:30:40 2011 +0200
@@ -28,7 +28,7 @@
for tag in annottags:
tags += tag + ";"
- doc.add(lucene.Field("type_doc", "text-annotation", lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
+ doc.add(lucene.Field("type_doc", "text-annotation", lucene.Field.Store.NO, lucene.Field.Index.NOT_ANALYZED))
doc.add(lucene.Field("tags", tags, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("title", annotation.title, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
doc.add(lucene.Field("abstract", annotation.description, lucene.Field.Store.NO, lucene.Field.Index.ANALYZED))
--- a/src/ldt/ldt/text/models.py Sun May 01 03:10:32 2011 +0200
+++ b/src/ldt/ldt/text/models.py Sun May 01 03:30:40 2011 +0200
@@ -1,9 +1,9 @@
from annotindexer import AnnotIndexer
from django.db import models
from django.utils.translation import ugettext_lazy as _
-from ldt.ldt_utils import STORE, ANALYZER
from tagging.models import Tag
from utils import generate_uuid
+import ldt.indexation
import lucene
import lxml
import tagging.fields
@@ -126,13 +126,13 @@
def delete(self):
super(Annotation, self).delete()
lucene.getVMEnv().attachCurrentThread()
- writer = lucene.IndexWriter(STORE, ANALYZER, True, lucene.IndexWriter.MaxFieldLength.UNLIMITED)
+ writer = ldt.indexation.get_writer()
writer.deleteDocuments(lucene.Term("external_id", self.external_id))
writer.close()
def index_annot(self):
lucene.getVMEnv().attachCurrentThread()
- writer = lucene.IndexWriter(STORE, ANALYZER, True, lucene.IndexWriter.MaxFieldLength.UNLIMITED)
+ writer = ldt.indexation.get_writer()
annotl = [self, ]
indexer = AnnotIndexer(annotl, writer)
indexer.index_all()
@@ -140,7 +140,7 @@
def update_index(self):
lucene.getVMEnv().attachCurrentThread()
- writer = lucene.IndexWriter(STORE, ANALYZER, True, lucene.IndexWriter.MaxFieldLength.UNLIMITED)
+ writer = ldt.indexation.get_writer()
writer.deleteDocuments(lucene.Term("external_id", self.external_id))
writer.close()
self.index_annot()
--- a/src/ldt/ldt/text/utils.py Sun May 01 03:10:32 2011 +0200
+++ b/src/ldt/ldt/text/utils.py Sun May 01 03:30:40 2011 +0200
@@ -1,5 +1,5 @@
from django.conf import settings
-from ldt.ldt_utils import STORE
+from ldt.indexation import STORE
import lucene
import uuid