web/ldt/text/utils.py
author wakimd
Tue, 23 Nov 2010 17:54:36 +0100
changeset 21 1a061f244254
parent 9 22ab430e9b64
child 24 9e19b7ae3780
permissions -rw-r--r--
Pylucene indexation

import uuid
import django.core.urlresolvers
from django.conf import settings
from ldt.text.models import *
import urllib
import datetime
import lxml.etree
import base64
import lucene
from ldt.ldt_utils import STORE
from ldt.ldt_utils import ANALYZER

__BOOLEAN_DICT = {
    'false':False,
    'true':True,
    '0':False,
    '1':True,
    't': True,
    'f':False
}

def boolean_convert(bool):
    if bool is None:
        return False
    if bool is True or bool is False:
        return bool
    key = str(bool).lower()
    return __BOOLEAN_DICT.get(key, False)


def generate_uuid():
    return unicode(uuid.uuid1())


def normalize_tags(list):
    nlist=[]
    for tag in list:
        tag = tag.lower()
        nlist.append(tag)
    taglist = dict().fromkeys(nlist).keys()    
    
    return taglist


def create_empty_annotation():
    iri = lxml.etree.Element('iri')
    doc = lxml.etree.ElementTree(iri)
    
    textannotation = lxml.etree.SubElement(iri, 'text-annotation')
    id = lxml.etree.SubElement(textannotation,'id')
    uri = lxml.etree.SubElement(textannotation,'uri')
    tags = lxml.etree.SubElement(textannotation,'tags')
    
    content = lxml.etree.SubElement(textannotation,'content')
    color = lxml.etree.SubElement(content,'color')
    description = lxml.etree.SubElement(content,'description')
    title = lxml.etree.SubElement(content,'title')
    text = lxml.etree.SubElement(content,'text')
    
    meta = lxml.etree.SubElement(textannotation,'meta')
    contributor = lxml.etree.SubElement(meta, "contributor")
    creator = lxml.etree.SubElement(meta, "creator")
    creationdate = lxml.etree.SubElement(meta, "created")
    updatedate = lxml.etree.SubElement(meta, "modified")

    return doc


class LdtSearch(object):

    def query(self, field, query):
        indexSearcher = lucene.IndexSearcher(STORE)
        queryParser = lucene.QueryParser(lucene.Version.LUCENE_30, field, lucene.FrenchAnalyzer(lucene.Version.LUCENE_30))
        queryParser.setDefaultOperator(lucene.QueryParser.Operator.AND)
        queryObj = queryParser.parse(query)
        hits = indexSearcher.search(queryObj, settings.LDT_MAX_SEARCH_NUMBER)
    
        res = []
        for hit in hits.scoreDocs:
            doc = indexSearcher.doc(hit.doc)
            res.append({"external_id":doc.get("external_id"),"title":doc.get("title")})
        indexSearcher.close()
        return res

    def queryAll(self, query):        
        return self.query("all", query)