src/ldt/ldt/text/utils.py
author ymh <ymh.work@gmail.com>
Sun, 01 May 2011 03:30:40 +0200
changeset 77 7923feb2e362
parent 63 93325a5d61f0
child 167 fe00e7302efe
permissions -rw-r--r--
improve indexation
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
22
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
     1
from django.conf import settings
77
7923feb2e362 improve indexation
ymh <ymh.work@gmail.com>
parents: 63
diff changeset
     2
from ldt.indexation import STORE
22
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
     3
import lucene
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
     4
import uuid
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
     5
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
     6
__BOOLEAN_DICT = {
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
     7
    'false':False,
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
     8
    'true':True,
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
     9
    '0':False,
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    10
    '1':True,
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    11
    't': True,
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    12
    'f':False
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    13
}
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    14
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    15
def boolean_convert(bool):
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    16
    if bool is None:
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    17
        return False
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    18
    if bool is True or bool is False:
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    19
        return bool
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    20
    key = str(bool).lower()
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    21
    return __BOOLEAN_DICT.get(key, False)
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    22
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    23
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    24
def generate_uuid():
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    25
    return unicode(uuid.uuid1())
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    26
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    27
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    28
#def normalize_tags(list):
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    29
#    nlist=[]
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    30
#    for tag in list:
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    31
#        tag = tag.lower()
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    32
#        nlist.append(tag)
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    33
#    taglist = dict().fromkeys(nlist).keys()    
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    34
#    
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    35
#    return taglist
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    36
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    37
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    38
class TextSearch(object):
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    39
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    40
    def query(self, field, query):
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    41
        indexSearcher = lucene.IndexSearcher(STORE)
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    42
        queryParser = lucene.QueryParser(lucene.Version.LUCENE_30, field, lucene.FrenchAnalyzer(lucene.Version.LUCENE_30))
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    43
        queryParser.setDefaultOperator(lucene.QueryParser.Operator.AND)
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    44
        queryObj = queryParser.parse(query)
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    45
        hits = indexSearcher.search(queryObj, settings.LDT_MAX_SEARCH_NUMBER)
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    46
    
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    47
        res = []
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    48
        for hit in hits.scoreDocs:
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    49
            doc = indexSearcher.doc(hit.doc)
63
93325a5d61f0 organize format and import
ymh <ymh.work@gmail.com>
parents: 22
diff changeset
    50
            res.append({"external_id":doc.get("external_id"), "title":doc.get("title")})
22
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    51
        indexSearcher.close()
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    52
        return res
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    53
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    54
    def queryAll(self, query):        
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    55
        return self.query("all", query)
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    56
    
03d02cf0bea7 Added text API, test Clients and Testcases
wakimd
parents:
diff changeset
    57