web/ldt/text/utils.py
author wakimd
Tue, 23 Nov 2010 17:54:36 +0100
changeset 21 1a061f244254
parent 9 22ab430e9b64
child 24 9e19b7ae3780
permissions -rw-r--r--
Pylucene indexation
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
9
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
     1
import uuid
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
     2
import django.core.urlresolvers
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
     3
from django.conf import settings
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
     4
from ldt.text.models import *
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
     5
import urllib
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
     6
import datetime
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
     7
import lxml.etree
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
     8
import base64
21
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
     9
import lucene
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    10
from ldt.ldt_utils import STORE
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    11
from ldt.ldt_utils import ANALYZER
9
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    12
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    13
__BOOLEAN_DICT = {
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    14
    'false':False,
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    15
    'true':True,
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    16
    '0':False,
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    17
    '1':True,
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    18
    't': True,
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    19
    'f':False
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    20
}
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    21
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    22
def boolean_convert(bool):
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    23
    if bool is None:
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    24
        return False
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    25
    if bool is True or bool is False:
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    26
        return bool
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    27
    key = str(bool).lower()
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    28
    return __BOOLEAN_DICT.get(key, False)
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    29
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    30
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    31
def generate_uuid():
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    32
    return unicode(uuid.uuid1())
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    33
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    34
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    35
def normalize_tags(list):
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    36
    nlist=[]
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    37
    for tag in list:
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    38
        tag = tag.lower()
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    39
        nlist.append(tag)
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    40
    taglist = dict().fromkeys(nlist).keys()    
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    41
    
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    42
    return taglist
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    43
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    44
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    45
def create_empty_annotation():
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    46
    iri = lxml.etree.Element('iri')
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    47
    doc = lxml.etree.ElementTree(iri)
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    48
    
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    49
    textannotation = lxml.etree.SubElement(iri, 'text-annotation')
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    50
    id = lxml.etree.SubElement(textannotation,'id')
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    51
    uri = lxml.etree.SubElement(textannotation,'uri')
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    52
    tags = lxml.etree.SubElement(textannotation,'tags')
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    53
    
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    54
    content = lxml.etree.SubElement(textannotation,'content')
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    55
    color = lxml.etree.SubElement(content,'color')
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    56
    description = lxml.etree.SubElement(content,'description')
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    57
    title = lxml.etree.SubElement(content,'title')
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    58
    text = lxml.etree.SubElement(content,'text')
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    59
    
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    60
    meta = lxml.etree.SubElement(textannotation,'meta')
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    61
    contributor = lxml.etree.SubElement(meta, "contributor")
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    62
    creator = lxml.etree.SubElement(meta, "creator")
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    63
    creationdate = lxml.etree.SubElement(meta, "created")
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    64
    updatedate = lxml.etree.SubElement(meta, "modified")
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    65
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    66
    return doc
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    67
21
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    68
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    69
class LdtSearch(object):
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    70
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    71
    def query(self, field, query):
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    72
        indexSearcher = lucene.IndexSearcher(STORE)
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    73
        queryParser = lucene.QueryParser(lucene.Version.LUCENE_30, field, lucene.FrenchAnalyzer(lucene.Version.LUCENE_30))
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    74
        queryParser.setDefaultOperator(lucene.QueryParser.Operator.AND)
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    75
        queryObj = queryParser.parse(query)
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    76
        hits = indexSearcher.search(queryObj, settings.LDT_MAX_SEARCH_NUMBER)
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    77
    
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    78
        res = []
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    79
        for hit in hits.scoreDocs:
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    80
            doc = indexSearcher.doc(hit.doc)
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    81
            res.append({"external_id":doc.get("external_id"),"title":doc.get("title")})
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    82
        indexSearcher.close()
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    83
        return res
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    84
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    85
    def queryAll(self, query):        
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    86
        return self.query("all", query)
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    87
    
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    88