web/ldt/text/utils.py
author ymh <ymh.work@gmail.com>
Thu, 16 Dec 2010 15:00:30 +0100
changeset 24 9e19b7ae3780
parent 22 83b28fc0d731
parent 21 1a061f244254
permissions -rw-r--r--
Merge with 1a061f24425462b0267eece1cdc970001580be24
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
22
83b28fc0d731 improve on ldt test framework
ymh <ymh.work@gmail.com>
parents: 9
diff changeset
     1
from django.conf import settings
24
9e19b7ae3780 Merge with 1a061f24425462b0267eece1cdc970001580be24
ymh <ymh.work@gmail.com>
parents: 22 21
diff changeset
     2
from ldt.ldt_utils import ANALYZER, STORE
22
83b28fc0d731 improve on ldt test framework
ymh <ymh.work@gmail.com>
parents: 9
diff changeset
     3
import base64
9
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
     4
import django.core.urlresolvers
24
9e19b7ae3780 Merge with 1a061f24425462b0267eece1cdc970001580be24
ymh <ymh.work@gmail.com>
parents: 22 21
diff changeset
     5
import lucene
22
83b28fc0d731 improve on ldt test framework
ymh <ymh.work@gmail.com>
parents: 9
diff changeset
     6
import lxml.etree
9
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
     7
import urllib
22
83b28fc0d731 improve on ldt test framework
ymh <ymh.work@gmail.com>
parents: 9
diff changeset
     8
import uuid
9
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
     9
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    10
__BOOLEAN_DICT = {
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    11
    'false':False,
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    12
    'true':True,
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    13
    '0':False,
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    14
    '1':True,
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    15
    't': True,
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    16
    'f':False
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    17
}
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    18
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    19
def boolean_convert(bool):
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    20
    if bool is None:
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    21
        return False
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    22
    if bool is True or bool is False:
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    23
        return bool
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    24
    key = str(bool).lower()
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    25
    return __BOOLEAN_DICT.get(key, False)
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    26
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    27
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    28
def generate_uuid():
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    29
    return unicode(uuid.uuid1())
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    30
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    31
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    32
def normalize_tags(list):
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    33
    nlist=[]
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    34
    for tag in list:
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    35
        tag = tag.lower()
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    36
        nlist.append(tag)
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    37
    taglist = dict().fromkeys(nlist).keys()    
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    38
    
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    39
    return taglist
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    40
22ab430e9b64 Corrections on models and general structure
wakimd
parents:
diff changeset
    41
21
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    42
class LdtSearch(object):
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    43
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    44
    def query(self, field, query):
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    45
        indexSearcher = lucene.IndexSearcher(STORE)
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    46
        queryParser = lucene.QueryParser(lucene.Version.LUCENE_30, field, lucene.FrenchAnalyzer(lucene.Version.LUCENE_30))
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    47
        queryParser.setDefaultOperator(lucene.QueryParser.Operator.AND)
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    48
        queryObj = queryParser.parse(query)
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    49
        hits = indexSearcher.search(queryObj, settings.LDT_MAX_SEARCH_NUMBER)
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    50
    
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    51
        res = []
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    52
        for hit in hits.scoreDocs:
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    53
            doc = indexSearcher.doc(hit.doc)
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    54
            res.append({"external_id":doc.get("external_id"),"title":doc.get("title")})
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    55
        indexSearcher.close()
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    56
        return res
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    57
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    58
    def queryAll(self, query):        
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    59
        return self.query("all", query)
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    60
    
1a061f244254 Pylucene indexation
wakimd
parents: 9
diff changeset
    61