--- a/.settings/org.eclipse.core.resources.prefs Mon Jul 23 17:05:53 2012 +0200
+++ b/.settings/org.eclipse.core.resources.prefs Mon Jul 30 19:46:40 2012 +0200
@@ -1,6 +1,8 @@
eclipse.preferences.version=1
encoding//src/ldt/ldt/core/migrations/0001_initial.py=utf-8
encoding//src/ldt/ldt/core/migrations/0002_auto__del_owner.py=utf-8
+encoding//src/ldt/ldt/indexation/highlighter.py=utf-8
+encoding//src/ldt/ldt/indexation/search_indexes.py=utf-8
encoding//src/ldt/ldt/ldt_utils/migrations/0001_initial.py=utf-8
encoding//src/ldt/ldt/ldt_utils/migrations/0002_auto__add_field_media_mimetype_field__chg_field_media_external_src_url.py=utf-8
encoding//src/ldt/ldt/ldt_utils/migrations/0003_auto__chg_field_project_owner.py=utf-8
@@ -17,5 +19,6 @@
encoding//src/ldt/ldt/user/migrations/0001_initial.py=utf-8
encoding//src/ldt/ldt/user/migrations/0008_auto__chg_field_groupprofile_image__chg_field_groupprofile_group__chg_.py.old=utf-8
encoding//virtualenv/web/env/guardianenv/Lib/site-packages/guardian/migrations/0001_initial.py=utf-8
+encoding//virtualenv/web/env/venv_platform/lib/python2.7/site-packages/haystack/backends/__init__.py=utf-8
encoding//web/ldtplatform/config.py=utf-8
encoding//web/ldtplatform/settings.py=utf-8
--- a/src/ldt/ldt/indexation/__init__.py Mon Jul 23 17:05:53 2012 +0200
+++ b/src/ldt/ldt/indexation/__init__.py Mon Jul 30 19:46:40 2012 +0200
@@ -1,151 +1,85 @@
from django.conf import settings
-import lucene
-
-lucene.initVM(lucene.CLASSPATH)
-STORE = lucene.SimpleFSDirectory(lucene.File(settings.INDEX_PATH))
-ANALYZER = lucene.PerFieldAnalyzerWrapper(lucene.StandardAnalyzer(lucene.Version.LUCENE_CURRENT))
-ANALYZER.addAnalyzer("tags", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
-ANALYZER.addAnalyzer("title", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
-ANALYZER.addAnalyzer("abstract", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
-ANALYZER.addAnalyzer("all", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
+from haystack.query import SearchQuerySet
+from ldt.indexation.highlighter import LdtHighlighter as Highlighter
+from ldt.ldt_utils.models import Segment
+from ldt.text.models import Annotation
+import re
+import sys
+
def get_results_with_context(field, query, content_list=None):
- searcher = get_searcher()
-
- try:
- res = get_results_list(field, query, searcher)
- contexts = []
- content_iri_ids = None
- if content_list is not None :
- content_iri_ids = [ctt.iri_id for ctt in content_list]
+ results = get_results_list(field, query, False)
+ contexts = []
+ content_iri_ids = None
+ if content_list is not None :
+ content_iri_ids = [ctt.iri_id for ctt in content_list]
- for i in res:
- doc = searcher.doc(i.doc)
- if content_iri_ids is None or (content_iri_ids is not None and doc.get("iri_id") in content_iri_ids) :
- ids = {"iri_id":doc.get("iri_id"), "ensemble_id":doc.get("ensemble_id"), "decoupage_id":doc.get("decoupage_id"), "element_id":doc.get("element_id"), "project_id":doc.get("project_id")}
- score = i.score
- title = doc.getField('title').stringValue()
- desc = doc.getField('abstract').stringValue()
- tags = doc.getField('tags').stringValue()
- begin = doc.getField('begin').stringValue()
- duration = doc.getField('duration').stringValue()
-
- ids['context'] = desc
- ids['title'] = title
- ids['tags'] = tags
- ids['score'] = score
- ids['indexation_id'] = i.doc
- ids['begin'] = begin
- ids['duration'] = duration
- contexts.append(ids)
- finally:
- searcher.close()
- return contexts
+ for res in results:
+ doc = res.get_stored_fields()
+ if content_iri_ids is None or (content_iri_ids is not None and doc.get("iri_id") in content_iri_ids) :
+ doc["score"] = res.score
+ doc["indexation_id"] = res.pk
+ doc["context"] = doc["abstract"]
+ doc["highlighted"] = res.highlighted
+ contexts.append(doc)
+ return contexts
+
+
-def get_results_list(field, query, indexSearcher=None):
- searcher_allocated = False
- if indexSearcher is None:
- indexSearcher, searcher_allocated = get_searcher(), True
- try:
- queryParser = get_query_parser(field)
- queryObj = queryParser.parse(query)
- hits = indexSearcher.search(queryObj, settings.LDT_MAX_SEARCH_NUMBER)
- finally:
- if searcher_allocated:
- indexSearcher.close()
- return hits.scoreDocs
+def get_results_list(field, query, highlight=True):
+
+ if field == 'all':
+ field = 'text'
+
+ qs = SearchQuerySet().models(Segment).auto_query(query, field)
+ if highlight:
+ qs = qs.highlight()
+ return qs
+
+
def get_result_text(field, query):
- indexSearcher = lucene.IndexSearcher(STORE)
- queryParser = lucene.QueryParser(lucene.Version.LUCENE_30, field, lucene.FrenchAnalyzer(lucene.Version.LUCENE_30))
- queryParser.setDefaultOperator(lucene.QueryParser.Operator.AND)
- queryObj = queryParser.parse(query)
- hits = indexSearcher.search(queryObj, settings.LDT_MAX_SEARCH_NUMBER)
+ if field == 'all':
+ field = 'text'
+ elif field == 'text':
+ field = 'text_field'
+
+ qs = SearchQuerySet.models(Annotation).auto_query(query, field)
- res = []
- for hit in hits.scoreDocs:
- doc = indexSearcher.doc(hit.doc)
- res.append({"external_id":doc.get("external_id"), "title":doc.get("title")})
- indexSearcher.close()
+ return [{'external_id':res.get_stored_fields()['external_id'], 'title': res.get_stored_fields()['title'], 'score': res.score} for res in qs]
- return res
-
-
def highlight_documents(results_list, query, field):
- searcher = get_searcher()
- try:
- analyzer = lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT)
- formatter = lucene.SimpleHTMLFormatter('<span class="highlight">', '</span>')
- query = get_query_parser(field).parse(query)
- highlighter = lucene.Highlighter(formatter, lucene.QueryScorer (query))
-
- for project in results_list:
- for segment in project['list']:
- lucene_doc = searcher.doc(segment.indexation_id)
- segment.context = get_highlighted_text(lucene_doc, analyzer, highlighter, 'abstract')
- tags = get_highlighted_text(lucene_doc, analyzer, highlighter, 'tags')
- segment.title = get_highlighted_text(lucene_doc, analyzer, highlighter, 'title')
-
- if segment.context == u'':
- segment.context = lucene_doc.getField('abstract').stringValue()
- if tags == u'':
- tags = lucene_doc.getField('tags').stringValue()
- if segment.title == u'':
- segment.title = lucene_doc.getField('title').stringValue()
-
- segment.context_tags = tags[tags.find(';')+1:]
- finally:
- searcher.close()
- return results_list
-def get_highlighted_text(doc, analyzer, highlighter, field):
- res = doc.getField(field).stringValue()
- ts = analyzer.tokenStream("body", lucene.StringReader(res))
- res = highlighter.getBestFragments(ts, res, settings.LDT_MAX_FRAGMENT_PER_SEARCH, "...")
- return res
-
-def get_writer(new=False):
- lucene.getVMEnv().attachCurrentThread()
- return lucene.IndexWriter(STORE, ANALYZER, new, lucene.IndexWriter.MaxFieldLength.UNLIMITED)
-
-def get_searcher():
- lucene.getVMEnv().attachCurrentThread()
- return lucene.IndexSearcher(STORE)
-
-def get_query_parser(field):
- queryParser = lucene.QueryParser(lucene.Version.LUCENE_30, field, lucene.FrenchAnalyzer(lucene.Version.LUCENE_30))
- queryParser.setDefaultOperator(lucene.QueryParser.Operator.AND)
- return queryParser
+ highlight = Highlighter(query, html_tag="span", css_class="highlight", max_length=sys.maxint)
+
+ for project in results_list:
+ for segment in project['list']:
+ if hasattr(segment, "highlighted") and segment.highlighted:
+ #TODO :
+ h = segment.highlighted[0]
+ hsplit = re.split("\-\-\*([\w\-]+)\*\-\-", h, flags=re.S)
+ highlighted_text = {
+ "context" : segment.abstract,
+ "tags" : segment.tags,
+ 'title' : segment.title,
+ }
-def delete_document(field, value, writer=None):
- if writer is None:
- writer = get_writer()
- try:
- writer.deleteDocuments(lucene.Term(field, value))
- writer.commit()
- finally:
- writer.close()
-
-def add_document(values, writer=None):
-
- if writer is None:
- writer = get_writer()
-
- doc = lucene.Document()
-
- for field_val in values:
- store = lucene.Field.Store.YES if field_val[2] else lucene.Field.Store.NO
- index_analyse = {
- "NOT_ANALYZED": lucene.Field.Index.NOT_ANALYZED,
- "ANALYSED": lucene.Field.Index.ANALYZED,
- "NO": lucene.Field.Index.NO
- }[field_val[3]]
- doc.add(lucene.Field(field_val[0], field_val[1], store, index_analyse))
-
- writer.addDocument(doc)
+ else:
+ highlighted_text = {
+ "context" : highlight.highlight(segment.abstract),
+ "tags" : highlight.highlight(segment.tags),
+ 'title' : highlight.highlight(segment.title)
+ }
+
+ segment.context = highlighted_text['context']
+ segment.title = highlighted_text['title']
+ tags = highlighted_text['tags']
+ segment.context_tags = tags[tags.find(';')+1:]
+
+ return results_list
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/ldt/ldt/indexation/highlighter.py Mon Jul 30 19:46:40 2012 +0200
@@ -0,0 +1,13 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Jul 30, 2012
+
+@author: ymh
+'''
+from haystack.utils import Highlighter
+
+class LdtHighlighter(Highlighter):
+
+ def render_html(self, highlight_locations=None, start_offset=None, end_offset=None):
+
+ return super(LdtHighlighter, self).render_html(highlight_locations=highlight_locations, start_offset=0, end_offset=len(self.text_block))
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/ldt/ldt/indexation/search_indexes.py Mon Jul 30 19:46:40 2012 +0200
@@ -0,0 +1,40 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Jul 23, 2012
+
+@author: ymh
+'''
+
+from haystack import indexes
+from ldt.ldt_utils.models import Segment
+from ldt.text.models import Annotation
+
+class SegmentIndex(indexes.RealTimeSearchIndex, indexes.Indexable):
+ text = indexes.CharField(document=True, use_template=True)
+ iri_id = indexes.CharField(model_attr='iri_id', indexed=False, stored=True)
+ project_id = indexes.CharField(model_attr='project_id', indexed=False, stored=True, null=True)
+ ensemble_id = indexes.CharField(model_attr='ensemble_id', indexed=False, stored=True)
+ cutting_id = indexes.CharField(model_attr='cutting_id', indexed=False, stored=True)
+ element_id = indexes.CharField(model_attr='element_id', indexed=False, stored=True)
+ tags = indexes.CharField(model_attr='tags', stored=True)
+ title = indexes.CharField(model_attr='title', stored=True)
+ abstract = indexes.CharField(model_attr='abstract', stored=True)
+ duration = indexes.IntegerField(model_attr='duration', indexed=False, stored=True)
+ author = indexes.CharField(model_attr='author', stored=True, null=True)
+ start_ts = indexes.IntegerField(model_attr='start_ts', indexed=False, stored=True)
+ date = indexes.CharField(model_attr='date', stored=True)
+
+ def get_model(self):
+ return Segment
+
+class AnnotationIndex(indexes.RealTimeSearchIndex, indexes.Indexable):
+ text = indexes.CharField(document=True, use_template=True)
+ tags = indexes.CharField(model_attr='tags', indexed=True, stored=False)
+ title = indexes.CharField(model_attr='title', indexed=True, stored=True)
+ abstract = indexes.CharField(model_attr='description', indexed=True, stored=False)
+ text_field = indexes.CharField(model_attr='text', indexed=True, stored=False)
+ annotation_id = indexes.CharField(model_attr='extarnal_id', indexed=False, stored=True)
+
+ def get_model(self):
+ return Annotation
+
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/ldt/ldt/indexation/templates/search/indexes/ldt_utils/annotation_text.txt Mon Jul 30 19:46:40 2012 +0200
@@ -0,0 +1,4 @@
+{{object.tags}}
+{{object.title}}
+{{object.description}}
+{{object.text}}
\ No newline at end of file
--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/ldt/ldt/indexation/templates/search/indexes/ldt_utils/segment_text.txt Mon Jul 30 19:46:40 2012 +0200
@@ -0,0 +1,6 @@
+--*t-a-g-s*--
+{{object.tags}}
+--*t-i-t-l-e*--
+{{object.title}}
+--*a-b-s-t-r-a-c-t*--
+{{object.abstract}}
\ No newline at end of file
--- a/src/ldt/ldt/ldt_utils/admin.py Mon Jul 23 17:05:53 2012 +0200
+++ b/src/ldt/ldt/ldt_utils/admin.py Mon Jul 30 19:46:40 2012 +0200
@@ -7,7 +7,6 @@
from ldt.ldt_utils.forms import LdtImportForm, ReindexForm, StatAnnotationForm
from ldt.ldt_utils.models import Content, Project, Media, Author
from ldt.ldt_utils.stat import update_stat_content
-import ldt.indexation
from guardian.admin import GuardedModelAdmin
@@ -55,20 +54,15 @@
if request.method == "POST":
form = ReindexForm(request.POST)
if form.is_valid():
- # try:
- writer = ldt.indexation.get_writer(True)
- try:
- contentList = form.cleaned_data["contents"]
- indexer = ContentIndexer(contentList, writer)
+ contentList = form.cleaned_data["contents"]
+ indexer = ContentIndexer(contentList)
+ indexer.index_all()
+
+ index_projects = form.cleaned_data["index_projects"]
+ if index_projects:
+ projectList = Project.objects.filter(contents__in=contentList, state=2).distinct() #filter(contents__in=contentList) @UndefinedVariable
+ indexer = ProjectIndexer(projectList)
indexer.index_all()
-
- index_projects = form.cleaned_data["index_projects"]
- if index_projects:
- projectList = Project.objects.filter(contents__in=contentList, state=2).distinct() #filter(contents__in=contentList) @UndefinedVariable
- indexer = ProjectIndexer(projectList, writer)
- indexer.index_all()
- finally:
- writer.close()
message = "Indexation ok : " + repr(form.cleaned_data["contents"])
form = ReindexForm()
# except Exception, inst:
--- a/src/ldt/ldt/ldt_utils/contentindexer.py Mon Jul 23 17:05:53 2012 +0200
+++ b/src/ldt/ldt/ldt_utils/contentindexer.py Mon Jul 30 19:46:40 2012 +0200
@@ -4,7 +4,6 @@
from ldt.ldt_utils.models import Segment, Content, Project
from ldt.ldt_utils.utils import reduce_text_node
from ldt.ldt_utils.stat import update_stat_project
-import ldt.indexation
import lxml.etree
import urllib #@UnresolvedImport
# import ldt.utils.log
@@ -15,9 +14,8 @@
class LdtIndexer(object):
- def __init__(self, writer, decoupage_blackList=settings.DECOUPAGE_BLACKLIST):
+ def __init__(self, decoupage_blackList=settings.DECOUPAGE_BLACKLIST):
self.__decoupage_blacklist = decoupage_blackList
- self.__writer = writer
@Property
def decoupage_blacklist(): #@NoSelf
@@ -36,12 +34,6 @@
return locals()
- @Property
- def writer(): #@NoSelf
- def fget(self):
- return self.__writer
- return locals()
-
def index_all(self):
raise NotImplemented
@@ -109,28 +101,11 @@
seg.polemics = seg.get_polemic(polemics)
seg.save()
- ldt.indexation.add_document([
- ("type_doc", "annotation", False, "NOT_ANALYZED"),
- ("iri_id", content.iri_id, True, "NOT_ANALYZED"),
- ("project_id", ldt_id, True, "NOT_ANALYZED"),
- ("ensemble_id", ensembleId, True, "NO"),
- ("decoupage_id", decoupId, True, "NO"),
- ("element_id", elementId, True, "NO"),
- ("tags", tags, True, "ANALYZED"),
- ("title", title, True, "ANALYZED"),
- ("abstract", abstract, True, "ANALYZED"),
- ("all", " ".join([tags, title, abstract]), True, "ANALYZED"),
- ("begin", str(start_ts), True, "NOT_ANALYZED"),
- ("duration", str(duration), True, "NOT_ANALYZED"),
- ("author", author, True, "ANALYZED"),
- ], self.writer)
-
-
class ContentIndexer(LdtIndexer):
- def __init__(self, contentList, writer, decoupage_blackList=settings.DECOUPAGE_BLACKLIST, callback=None):
- super(ContentIndexer, self).__init__(writer, decoupage_blackList)
+ def __init__(self, contentList, decoupage_blackList=settings.DECOUPAGE_BLACKLIST, callback=None):
+ super(ContentIndexer, self).__init__(decoupage_blackList)
self.__contentList = contentList
self.__callback = callback
@@ -140,27 +115,23 @@
self.__callback(i,content)
self.index_content(content)
- def index_content(self, content):
-
+ def index_content(self, content):
url = content.iri_url()
filepath = urllib.urlopen(url)
doc = lxml.etree.parse(filepath) #@UndefinedVariable
- ldt.indexation.delete_document("iri_id", content.iri_id, self.writer)
Segment.objects.filter(iri_id=content.iri_id).delete() #@UndefinedVariable
res = doc.xpath("/iri/body/ensembles/ensemble")
for ensemble in res:
self.index_ensemble(ensemble, content)
-
- self.writer.commit()
-
+
class ProjectIndexer(LdtIndexer):
- def __init__(self, projectList, writer, decoupage_blackList=settings.DECOUPAGE_BLACKLIST, callback=None):
- super(ProjectIndexer, self).__init__(writer, decoupage_blackList)
+ def __init__(self, projectList, decoupage_blackList=settings.DECOUPAGE_BLACKLIST, callback=None):
+ super(ProjectIndexer, self).__init__(decoupage_blackList)
self.__projectList = projectList
self.__callback = callback
@@ -176,7 +147,6 @@
# pocketfilms.utils.log.debug("Indexing project : "+str(project.iri_id))
doc = lxml.etree.fromstring(project.ldt_encoded) #@UndefinedVariable
- ldt.indexation.delete_document("project_id", project.ldt_id, self.writer)
Segment.objects.filter(project_obj__ldt_id=project.ldt_id).delete() #@UndefinedVariable
res = doc.xpath("/iri/annotations/content")
@@ -191,22 +161,15 @@
for ensemble in content.getchildren():
self.index_ensemble(ensemble, content_obj, project)
-
- self.writer.commit()
@receiver(post_save, sender=Project)
def index_project(sender, **kwargs):
if settings.AUTO_INDEX_AFTER_SAVE:
instance = kwargs['instance']
- writer = ldt.indexation.get_writer()
- try:
- if instance.state != 2:
- ldt.indexation.delete_document("project_id", instance.ldt_id, writer)
- Segment.objects.filter(project_obj__ldt_id=instance.ldt_id).delete() #@UndefinedVariable
- update_stat_project(instance)
- else:
- projectIndexer = ProjectIndexer([instance], writer)
- projectIndexer.index_all()
- update_stat_project(instance)
- finally:
- writer.close()
+ if instance.state != 2:
+ Segment.objects.filter(project_obj__ldt_id=instance.ldt_id).delete() #@UndefinedVariable
+ update_stat_project(instance)
+ else:
+ projectIndexer = ProjectIndexer([instance])
+ projectIndexer.index_all()
+ update_stat_project(instance)
--- a/src/ldt/ldt/ldt_utils/models.py Mon Jul 23 17:05:53 2012 +0200
+++ b/src/ldt/ldt/ldt_utils/models.py Mon Jul 30 19:46:40 2012 +0200
@@ -178,11 +178,7 @@
return locals()
mimetype = property(**mimetype())
-
- def delete(self):
- super(Content, self).delete()
- ldt.indexation.delete_document("iri_id",self.iri_id)
-
+
def sync_iri_file(self):
# create iri file if needed
created = False
--- a/src/ldt/ldt/ldt_utils/utils.py Mon Jul 23 17:05:53 2012 +0200
+++ b/src/ldt/ldt/ldt_utils/utils.py Mon Jul 30 19:46:40 2012 +0200
@@ -1,6 +1,6 @@
from copy import deepcopy
from django.conf import settings
-from ldt.indexation import get_searcher, get_results_list
+from ldt.indexation import get_results_list
from django.utils.translation import ugettext as _
from StringIO import StringIO
import datetime
@@ -40,14 +40,11 @@
class LdtSearch(object):
def query(self, field, query):
- indexSearcher = get_searcher()
hits = get_results_list(field, query)
res = []
for hit in hits:
- doc = indexSearcher.doc(hit.doc)
- res.append({"iri_id":doc.get("iri_id"), "ensemble_id":doc.get("ensemble_id"), "decoupage_id":doc.get("decoupage_id"), "element_id":doc.get("element_id"), "project_id":doc.get("project_id")})
- indexSearcher.close()
+ res.append(hit.get_stored_fields())
return res
def query_all(self, query):
--- a/src/ldt/ldt/ldt_utils/views/workspace.py Mon Jul 23 17:05:53 2012 +0200
+++ b/src/ldt/ldt/ldt_utils/views/workspace.py Mon Jul 30 19:46:40 2012 +0200
@@ -212,12 +212,13 @@
valid_segments = []
for s in all_related_segments:
- segment = [seg for seg in all_segments if seg.element_id == s['element_id'] and seg.project_id == s['project_id'] and seg.iri_id == s['iri_id'] and seg.cutting_id == s['decoupage_id'] and seg.ensemble_id == s['ensemble_id'] ][0]
+ segment = [seg for seg in all_segments if seg.element_id == s['element_id'] and seg.project_id == s['project_id'] and seg.iri_id == s['iri_id'] and seg.cutting_id == s['cutting_id'] and seg.ensemble_id == s['ensemble_id'] ][0]
segment.score = s['score']
segment.indexation_id = s['indexation_id']
segment.context = s['context']
segment.context_tags = s['tags']
+ segment.highlighted = s['highlighted']
if not s['project_id']:
segment.project_id = '_'
--- a/src/ldt/ldt/management/commands/reindex.py Mon Jul 23 17:05:53 2012 +0200
+++ b/src/ldt/ldt/management/commands/reindex.py Mon Jul 30 19:46:40 2012 +0200
@@ -1,5 +1,4 @@
from django.core.management.base import BaseCommand
-import ldt.indexation
from ldt.ldt_utils.models import Content, Project
from ldt.ldt_utils.contentindexer import ContentIndexer, ProjectIndexer
from ldt.management.utils import show_progress
@@ -22,9 +21,7 @@
def handle(self, *args, **options):
parser = self.create_parser("reindex", "")
options, _ = parser.parse_args()
-
- writer = ldt.indexation.get_writer(True)
-
+
if options.content_id:
self.stdout.write('Creating index for %s\n' % options.content_id)
contentList = Content.objects.filter(iri_id=options.content_id)
@@ -36,7 +33,7 @@
c = lambda i,o: show_progress(i+1, count, o.title, 50)
- indexer = ContentIndexer(contentList, writer, callback=c)
+ indexer = ContentIndexer(contentList, callback=c)
indexer.index_all()
if options.projects:
@@ -44,7 +41,5 @@
projectList = Project.objects.filter(contents__in=contentList, state=2).distinct()
count = projectList.count()
c = lambda i,o: show_progress(i+1, count, o.title, 50)
- indexer = ProjectIndexer(projectList, writer, callback=c)
+ indexer = ProjectIndexer(projectList, callback=c)
indexer.index_all()
-
- writer.close()
--- a/src/ldt/ldt/settings.py Mon Jul 23 17:05:53 2012 +0200
+++ b/src/ldt/ldt/settings.py Mon Jul 30 19:46:40 2012 +0200
@@ -20,12 +20,14 @@
'django.contrib.admin',
'registration',
'tagging',
+ 'haystack',
'ldt',
'ldt.core',
'ldt.ldt_utils',
'ldt.text',
'ldt.user',
'ldt.management',
+ 'ldt.indexation',
'oauth_provider',
'django_openid_consumer',
'piston',
@@ -96,4 +98,10 @@
DEFAULT_USER_ICON = "thumbnails/users/user_default_icon.png"
DEFAULT_GROUP_ICON = "thumbnails/groups/group_default_icon.png"
-EXTERNAL_STREAM_SRC = getattr(settings, 'EXTERNAL_STREAM_SRC', ['youtube.com', 'dailymotion.com'])
\ No newline at end of file
+EXTERNAL_STREAM_SRC = getattr(settings, 'EXTERNAL_STREAM_SRC', ['youtube.com', 'dailymotion.com'])
+
+HAYSTACK_CONNECTIONS = {
+ 'default': {
+ 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine',
+ },
+}
--- a/src/ldt/ldt/text/annotindexer.py Mon Jul 23 17:05:53 2012 +0200
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,36 +0,0 @@
-import ldt.indexation
-
-class AnnotIndexer(object):
-
- def __init__(self, annotList, writer):
- self.__annotList = annotList
- self.__writer = writer
-
-
- def index_all(self):
- for annot in self.__annotList:
- self.index_annotation(annot)
-
-
- def index_annotation(self, annotation):
-
-
- annottags = annotation.get_tag_list()
- tags = ""
-
- if annottags is None or len(annottags) == 0:
- tags = ""
- else:
- for tag in annottags:
- tags += tag + ";"
-
- ldt.indexation.add_document([
- ("annotation_id", annotation.external_id, True, "NOT_ANALYZED"),
- ("type_doc", "text-annotation", False, "NOT_ANALYZED"),
- ("tags", tags, False, "ANALYZED"),
- ("title", annotation.title, False, "ANALYZED"),
- ("abstract", annotation.description, False, "ANALYZED"),
- ("text", annotation.text, False, "ANALYZED"),
- ("all", " ".join([tags, annotation.title, annotation.description, annotation.text]), False, "ANALYZED"),
- ], self.__writer)
-
--- a/src/ldt/ldt/text/models.py Mon Jul 23 17:05:53 2012 +0200
+++ b/src/ldt/ldt/text/models.py Mon Jul 30 19:46:40 2012 +0200
@@ -1,9 +1,7 @@
-from annotindexer import AnnotIndexer
from django.db import models
from django.utils.translation import ugettext_lazy as _
from tagging.models import Tag
from utils import generate_uuid
-import ldt.indexation
import lxml
import tagging.fields
#from django.core.management.validation import max_length
@@ -117,26 +115,8 @@
def create_annotation(external_id, uri=None, tags=None, title=None, description=None, text=None, color=None, creator=None, contributor=None, creation_date=None, update_date=None):
annotation = Annotation(external_id=external_id, uri=uri, tags=tags, title=title, description=description, text=text, color=color, creator=creator, contributor=contributor, creation_date=creation_date, update_date=update_date)
annotation.save()
- annotation.index_annot()
return annotation
-
- def delete(self):
- super(Annotation, self).delete()
- ldt.indexation.delete_document("external_id", self.external_id)
-
- def index_annot(self):
- writer = ldt.indexation.get_writer()
- try:
- annotl = [self, ]
- indexer = AnnotIndexer(annotl, writer)
- indexer.index_all()
- finally:
- writer.close()
-
- def update_index(self):
- ldt.indexation.delete_document("external_id", self.external_id)
- self.index_annot()
--- a/src/ldt/ldt/text/views.py Mon Jul 23 17:05:53 2012 +0200
+++ b/src/ldt/ldt/text/views.py Mon Jul 30 19:46:40 2012 +0200
@@ -192,7 +192,6 @@
annot.update_date = unicode(update_date[0])
annot.save()
- annot.update_index()
return HttpResponse(lxml.etree.tostring(annot.serialize(), pretty_print=True), mimetype="text/xml;charset=utf-8")
--- a/virtualenv/res/lib/lib_create_env.py Mon Jul 23 17:05:53 2012 +0200
+++ b/virtualenv/res/lib/lib_create_env.py Mon Jul 30 19:46:40 2012 +0200
@@ -35,13 +35,13 @@
'SSH': {'setup': 'ssh', 'url':'http://pypi.python.org/packages/source/s/ssh/ssh-1.7.14.tar.gz#md5=4cdd0549ef4699bd67b96264d3b21427', 'local':'ssh-1.7.14.tar.gz', 'install': {'method': 'pip', 'option_str': None, 'dict_extra_env': None}},
'FABRIC': {'setup': 'fabric', 'url':'https://github.com/fabric/fabric/tarball/1.4.2', 'local':'fabric-1.4.2.tar.gz', 'install': {'method': 'pip', 'option_str': None, 'dict_extra_env': None}},
'MERCURIAL': {'setup': 'mercurial', 'url':'http://mercurial.selenic.com/release/mercurial-2.2.2.tar.gz', 'local':'mercurial-2.2.2.tar.gz', 'install': {'method': 'pip', 'option_str': None, 'dict_extra_env': None}},
+ 'HAYSTACK': {'setup': 'django-haystack', 'url': 'https://github.com/toastdriven/django-haystack/tarball/master', 'local': 'django-haystack-v2.0.0.tar.gz', 'install':{'method':'pip', 'option_str': None, 'dict_extra_env': None}},
+ 'REQUEST': {'setup': 'requests', 'url':'https://github.com/kennethreitz/requests/tarball/v0.13.3', 'local':'requests-v0.13.3.tar.gz', 'install' : {'method':'pip', 'option_str': None, 'dict_extra_env': None}},
}
if system_str == 'Windows':
URLS.update({
'PSYCOPG2': {'setup': 'psycopg2','url': 'psycopg2-2.0.14.win32-py2.6-pg8.4.3-release.zip', 'local':"psycopg2-2.0.14.win32-py2.6-pg8.4.3-release.zip", 'install': {'method': 'install_psycopg2', 'option_str': None, 'dict_extra_env': None}},
- 'JCC': {'setup': 'jcc', 'url': 'http://pylucene-win32-binary.googlecode.com/files/JCC-2.6-py2.6-win32.egg', 'local':"JCC-2.6-py2.6-win32.egg", 'install': {'method': 'easy_install', 'option_str': None, 'dict_extra_env': None}},
- 'PYLUCENE': {'setup': 'pylucene', 'url': 'http://pylucene-win32-binary.googlecode.com/files/lucene-3.0.2-py2.6-win32.egg', 'local':"lucene-3.0.2-py2.6-win32.egg", 'install': {'method': 'easy_install', 'option_str': None, 'dict_extra_env': None}},
'PIL': {'setup': 'pil', 'url': 'http://effbot.org/media/downloads/PIL-1.1.7.win32-py2.6.exe', 'local':"PIL-1.1.7.win32-py2.6.exe", 'install': {'method': 'easy_install', 'option_str': None, 'dict_extra_env': None}},
'LXML': {'setup': 'lxml', 'url': 'http://pypi.python.org/packages/2.6/l/lxml/lxml-2.2.2-py2.6-win32.egg', 'local':"lxml-2.2.2-py2.6-win32.egg", 'install': {'method': 'easy_install', 'option_str': None, 'dict_extra_env': None}}
})
@@ -55,7 +55,6 @@
URLS.update({
'PSYCOPG2': {'setup': 'psycopg2','url': 'http://www.psycopg.org/psycopg/tarballs/PSYCOPG-2-4/psycopg2-2.4.5.tar.gz', 'local':"psycopg2-2.4.5.tar.gz", 'install': {'method': 'pip', 'option_str': None, 'dict_extra_env': None}},
- 'PYLUCENE': {'setup': 'pylucene', 'url': 'http://mirrors.ircam.fr/pub/apache/lucene/pylucene/pylucene-3.6.0-2-src.tar.gz', 'local':"pylucene-3.6.0-2-src.tar.gz", 'install': {'method': 'install_pylucene', 'option_str': None, 'dict_extra_env': None}},
'PIL': {'setup': 'pil', 'url': 'http://effbot.org/downloads/Imaging-1.1.7.tar.gz', 'local':"Imaging-1.1.7.tar.gz", 'install': {'method': 'easy_install', 'option_str': None, 'dict_extra_env': None}},
'LXML': {'setup': 'lxml', 'url':"lxml-2.3.4.tar.bz2", 'local':"lxml-2.3.4.tar.bz2", 'install': {'method': lxml_method, 'option_str': None, 'dict_extra_env': lxml_options}},
})
Binary file virtualenv/res/src/JCC-2.6-py2.6-win32.egg has changed
Binary file virtualenv/res/src/django-haystack-v2.0.0.tar.gz has changed
Binary file virtualenv/res/src/lucene-3.0.2-py2.6-win32.egg has changed
Binary file virtualenv/res/src/pyelasticsearch.tar.gz has changed
Binary file virtualenv/res/src/pylucene-3.6.0-2-src.tar.gz has changed
Binary file virtualenv/res/src/requests-v0.13.3.tar.gz has changed
--- a/virtualenv/web/res/res_create_env.py Mon Jul 23 17:05:53 2012 +0200
+++ b/virtualenv/web/res/res_create_env.py Mon Jul 30 19:46:40 2012 +0200
@@ -7,7 +7,6 @@
INSTALLS = [ #(key,method, option_str, dict_extra_env)
'LXML',
- 'PYLUCENE',
'PSYCOPG2',
'SOUTH',
'PIL',
@@ -26,9 +25,6 @@
'SORL_THUMBNAIL',
]
-if system_str == 'Windows':
- INSTALLS.insert(0, 'JCC')
-
if system_str == "Linux":
INSTALLS.insert(2, 'DISTRIBUTE')
--- a/web/ldtplatform/config.py.tmpl Mon Jul 23 17:05:53 2012 +0200
+++ b/web/ldtplatform/config.py.tmpl Mon Jul 30 19:46:40 2012 +0200
@@ -98,5 +98,11 @@
FRONT_TAG_LIST = [u"Enmi 2011", u"film", u"conférence"]
+HAYSTACK_CONNECTIONS = {
+ 'default': {
+ 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine',
+ },
+}
+
--- a/web/ldtplatform/settings.py Mon Jul 23 17:05:53 2012 +0200
+++ b/web/ldtplatform/settings.py Mon Jul 30 19:46:40 2012 +0200
@@ -129,6 +129,7 @@
'django.contrib.messages',
'django.contrib.admin',
'django.contrib.staticfiles',
+ 'haystack',
'ldtplatform',
'registration',
'tagging',
@@ -138,6 +139,7 @@
'ldt.text',
'ldt.user',
'ldt.management',
+ 'ldt.indexation',
'oauth_provider',
'django_openid_consumer',
'piston',
@@ -231,6 +233,12 @@
EXTERNAL_STREAM_SRC = ['youtube.com', 'dailymotion.com']
+HAYSTACK_CONNECTIONS = {
+ 'default': {
+ 'ENGINE': 'haystack.backends.simple_backend.SimpleEngine',
+ },
+}
+
from config import *
if not "LOGIN_URL" in locals():