|
9
|
1 |
import uuid |
|
|
2 |
import django.core.urlresolvers |
|
|
3 |
from django.conf import settings |
|
|
4 |
from ldt.text.models import * |
|
|
5 |
import urllib |
|
|
6 |
import datetime |
|
|
7 |
import lxml.etree |
|
|
8 |
import base64 |
|
21
|
9 |
import lucene |
|
|
10 |
from ldt.ldt_utils import STORE |
|
|
11 |
from ldt.ldt_utils import ANALYZER |
|
9
|
12 |
|
|
|
13 |
__BOOLEAN_DICT = { |
|
|
14 |
'false':False, |
|
|
15 |
'true':True, |
|
|
16 |
'0':False, |
|
|
17 |
'1':True, |
|
|
18 |
't': True, |
|
|
19 |
'f':False |
|
|
20 |
} |
|
|
21 |
|
|
|
22 |
def boolean_convert(bool): |
|
|
23 |
if bool is None: |
|
|
24 |
return False |
|
|
25 |
if bool is True or bool is False: |
|
|
26 |
return bool |
|
|
27 |
key = str(bool).lower() |
|
|
28 |
return __BOOLEAN_DICT.get(key, False) |
|
|
29 |
|
|
|
30 |
|
|
|
31 |
def generate_uuid(): |
|
|
32 |
return unicode(uuid.uuid1()) |
|
|
33 |
|
|
|
34 |
|
|
|
35 |
def normalize_tags(list): |
|
|
36 |
nlist=[] |
|
|
37 |
for tag in list: |
|
|
38 |
tag = tag.lower() |
|
|
39 |
nlist.append(tag) |
|
|
40 |
taglist = dict().fromkeys(nlist).keys() |
|
|
41 |
|
|
|
42 |
return taglist |
|
|
43 |
|
|
|
44 |
|
|
|
45 |
def create_empty_annotation(): |
|
|
46 |
iri = lxml.etree.Element('iri') |
|
|
47 |
doc = lxml.etree.ElementTree(iri) |
|
|
48 |
|
|
|
49 |
textannotation = lxml.etree.SubElement(iri, 'text-annotation') |
|
|
50 |
id = lxml.etree.SubElement(textannotation,'id') |
|
|
51 |
uri = lxml.etree.SubElement(textannotation,'uri') |
|
|
52 |
tags = lxml.etree.SubElement(textannotation,'tags') |
|
|
53 |
|
|
|
54 |
content = lxml.etree.SubElement(textannotation,'content') |
|
|
55 |
color = lxml.etree.SubElement(content,'color') |
|
|
56 |
description = lxml.etree.SubElement(content,'description') |
|
|
57 |
title = lxml.etree.SubElement(content,'title') |
|
|
58 |
text = lxml.etree.SubElement(content,'text') |
|
|
59 |
|
|
|
60 |
meta = lxml.etree.SubElement(textannotation,'meta') |
|
|
61 |
contributor = lxml.etree.SubElement(meta, "contributor") |
|
|
62 |
creator = lxml.etree.SubElement(meta, "creator") |
|
|
63 |
creationdate = lxml.etree.SubElement(meta, "created") |
|
|
64 |
updatedate = lxml.etree.SubElement(meta, "modified") |
|
|
65 |
|
|
|
66 |
return doc |
|
|
67 |
|
|
21
|
68 |
|
|
|
69 |
class LdtSearch(object): |
|
|
70 |
|
|
|
71 |
def query(self, field, query): |
|
|
72 |
indexSearcher = lucene.IndexSearcher(STORE) |
|
|
73 |
queryParser = lucene.QueryParser(lucene.Version.LUCENE_30, field, lucene.FrenchAnalyzer(lucene.Version.LUCENE_30)) |
|
|
74 |
queryParser.setDefaultOperator(lucene.QueryParser.Operator.AND) |
|
|
75 |
queryObj = queryParser.parse(query) |
|
|
76 |
hits = indexSearcher.search(queryObj, settings.LDT_MAX_SEARCH_NUMBER) |
|
|
77 |
|
|
|
78 |
res = [] |
|
|
79 |
for hit in hits.scoreDocs: |
|
|
80 |
doc = indexSearcher.doc(hit.doc) |
|
|
81 |
res.append({"external_id":doc.get("external_id"),"title":doc.get("title")}) |
|
|
82 |
indexSearcher.close() |
|
|
83 |
return res |
|
|
84 |
|
|
|
85 |
def queryAll(self, query): |
|
|
86 |
return self.query("all", query) |
|
|
87 |
|
|
|
88 |
|