src/ldt/ldt/text/utils.py
author wakimd
Fri, 11 Feb 2011 11:51:35 +0100
changeset 22 03d02cf0bea7
child 63 93325a5d61f0
permissions -rw-r--r--
Added text API, test Clients and Testcases
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
22
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
     1
from django.conf import settings
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
     2
from ldt.ldt_utils import ANALYZER, STORE
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
     3
import base64
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
     4
import django.core.urlresolvers
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
     5
import lucene
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
     6
import lxml.etree
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
     7
import urllib
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
     8
import uuid
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
     9
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    10
__BOOLEAN_DICT = {
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    11
    'false':False,
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    12
    'true':True,
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    13
    '0':False,
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    14
    '1':True,
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    15
    't': True,
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    16
    'f':False
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    17
}
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    18
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    19
def boolean_convert(bool):
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    20
    if bool is None:
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    21
        return False
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    22
    if bool is True or bool is False:
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    23
        return bool
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    24
    key = str(bool).lower()
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    25
    return __BOOLEAN_DICT.get(key, False)
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    26
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    27
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    28
def generate_uuid():
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    29
    return unicode(uuid.uuid1())
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    30
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    31
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    32
#def normalize_tags(list):
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    33
#    nlist=[]
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    34
#    for tag in list:
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    35
#        tag = tag.lower()
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    36
#        nlist.append(tag)
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    37
#    taglist = dict().fromkeys(nlist).keys()    
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    38
#    
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    39
#    return taglist
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    40
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    41
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    42
class TextSearch(object):
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    43
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    44
    def query(self, field, query):
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    45
        indexSearcher = lucene.IndexSearcher(STORE)
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    46
        queryParser = lucene.QueryParser(lucene.Version.LUCENE_30, field, lucene.FrenchAnalyzer(lucene.Version.LUCENE_30))
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    47
        queryParser.setDefaultOperator(lucene.QueryParser.Operator.AND)
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    48
        queryObj = queryParser.parse(query)
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    49
        hits = indexSearcher.search(queryObj, settings.LDT_MAX_SEARCH_NUMBER)
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    50
    
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    51
        res = []
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    52
        for hit in hits.scoreDocs:
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    53
            doc = indexSearcher.doc(hit.doc)
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    54
            res.append({"external_id":doc.get("external_id"),"title":doc.get("title")})
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    55
        indexSearcher.close()
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    56
        return res
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    57
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    58
    def queryAll(self, query):        
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    59
        return self.query("all", query)
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    60
    
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    61