from django.conf import settings
import lucene
lucene.initVM(lucene.CLASSPATH)
STORE = lucene.SimpleFSDirectory(lucene.File(settings.INDEX_PATH))
ANALYZER = lucene.PerFieldAnalyzerWrapper(lucene.StandardAnalyzer(lucene.Version.LUCENE_CURRENT))
ANALYZER.addAnalyzer("tags", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
ANALYZER.addAnalyzer("title", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
ANALYZER.addAnalyzer("abstract", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
ANALYZER.addAnalyzer("all", lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT))
def get_results_with_context(field, query, content_list=None):
searcher = get_searcher()
try:
res = get_results_list(field, query, searcher)
contexts = []
content_iri_ids = None
if content_list is not None :
content_iri_ids = [ctt.iri_id for ctt in content_list]
for i in res:
doc = searcher.doc(i.doc)
if content_iri_ids is None or (content_iri_ids is not None and doc.get("iri_id") in content_iri_ids) :
ids = {"iri_id":doc.get("iri_id"), "ensemble_id":doc.get("ensemble_id"), "decoupage_id":doc.get("decoupage_id"), "element_id":doc.get("element_id"), "project_id":doc.get("project_id")}
score = i.score
title = doc.getField('title').stringValue()
desc = doc.getField('abstract').stringValue()
tags = doc.getField('tags').stringValue()
begin = doc.getField('begin').stringValue()
duration = doc.getField('duration').stringValue()
ids['context'] = desc
ids['title'] = title
ids['tags'] = tags
ids['score'] = score
ids['lucene_id'] = i.doc
ids['begin'] = begin
ids['duration'] = duration
contexts.append(ids)
finally:
searcher.close()
return contexts
def get_results_list(field, query, indexSearcher=None):
searcher_allocated = False
if indexSearcher is None:
indexSearcher, searcher_allocated = get_searcher(), True
try:
queryParser = get_query_parser(field)
queryObj = queryParser.parse(query)
hits = indexSearcher.search(queryObj, settings.LDT_MAX_SEARCH_NUMBER)
finally:
if searcher_allocated:
indexSearcher.close()
return hits.scoreDocs
def highlight_documents(results_list, query, field):
searcher = get_searcher()
try:
analyzer = lucene.FrenchAnalyzer(lucene.Version.LUCENE_CURRENT)
formatter = lucene.SimpleHTMLFormatter('<span class="highlight">', '</span>')
query = get_query_parser(field).parse(query)
highlighter = lucene.Highlighter(formatter, lucene.QueryScorer (query))
for project in results_list:
for segment in project['list']:
lucene_doc = searcher.doc(segment.lucene_id)
segment.context = get_highlighted_text(lucene_doc, analyzer, highlighter, 'abstract')
tags = get_highlighted_text(lucene_doc, analyzer, highlighter, 'tags')
segment.title = get_highlighted_text(lucene_doc, analyzer, highlighter, 'title')
if segment.context == u'':
segment.context = lucene_doc.getField('abstract').stringValue()
if tags == u'':
tags = lucene_doc.getField('tags').stringValue()
if segment.title == u'':
segment.title = lucene_doc.getField('title').stringValue()
segment.context_tags = tags[tags.find(';')+1:]
finally:
searcher.close()
return results_list
def get_highlighted_text(doc, analyzer, highlighter, field):
res = doc.getField(field).stringValue()
ts = analyzer.tokenStream("body", lucene.StringReader(res))
res = highlighter.getBestFragments(ts, res, settings.LDT_MAX_FRAGMENT_PER_SEARCH, "...")
return res
def get_writer(new=False):
lucene.getVMEnv().attachCurrentThread()
return lucene.IndexWriter(STORE, ANALYZER, new, lucene.IndexWriter.MaxFieldLength.UNLIMITED)
def get_searcher():
lucene.getVMEnv().attachCurrentThread()
return lucene.IndexSearcher(STORE)
def get_query_parser(field):
queryParser = lucene.QueryParser(lucene.Version.LUCENE_30, field, lucene.FrenchAnalyzer(lucene.Version.LUCENE_30))
queryParser.setDefaultOperator(lucene.QueryParser.Operator.AND)
return queryParser
def delete_document(field, value):
writer = get_writer()
try:
writer.deleteDocuments(lucene.Term(field, value))
writer.commit()
finally:
writer.close()