|
27
|
1 |
''' |
|
|
2 |
Created on 7 aout 2012 |
|
|
3 |
|
|
|
4 |
@author: gerard |
|
|
5 |
''' |
|
|
6 |
import logging |
|
|
7 |
import simplejson |
|
|
8 |
import locale |
|
|
9 |
from datetime import datetime |
|
37
|
10 |
import time |
|
27
|
11 |
|
|
|
12 |
from django.core.cache import cache |
|
|
13 |
from document.models import Annotationdocument |
|
|
14 |
from document.models import Tag, Cluster |
|
|
15 |
from mediapartdb.MediapartReader import MediapartReader |
|
|
16 |
from dataparser.ClientDocumentsGetAttributes import ClientDocumentsGetAttributes |
|
37
|
17 |
from document.models import Documentaryfile |
|
27
|
18 |
logger = logging.getLogger('document') |
|
|
19 |
|
|
|
20 |
# List of documents of a cluster with annotations |
|
|
21 |
class Documents(object): |
|
|
22 |
|
|
|
23 |
def __init__(self, request): |
|
|
24 |
self.request = request |
|
|
25 |
|
|
37
|
26 |
def get_documents(self,query,cluster,offset,count,docId): |
|
|
27 |
logger.info('get_documents query'+str(query)) |
|
|
28 |
logger.info('get_documents cluster'+str(cluster)) |
|
|
29 |
logger.info('get_documents offset'+str(offset)) |
|
|
30 |
logger.info('get_documents docId'+str(docId)) |
|
|
31 |
logger.info('get_documents count'+str(count)) |
|
|
32 |
|
|
27
|
33 |
json = {} |
|
|
34 |
|
|
37
|
35 |
'''if int(query) == 0 and int(docId) == 0: |
|
|
36 |
logger.info('ENTER1') |
|
27
|
37 |
attr = ClientDocumentsGetAttributes(self.request) |
|
|
38 |
|
|
|
39 |
if not attr.get_cluster(): |
|
|
40 |
json = '{"error msg": "no cluster_id defined"}' |
|
|
41 |
return json |
|
|
42 |
|
|
|
43 |
if attr.get_offset() == '': |
|
|
44 |
json = '{"error msg": "no offset defined"}' |
|
|
45 |
return json |
|
|
46 |
|
|
|
47 |
if attr.get_count() == '': |
|
|
48 |
json = '{"error msg": "no count defined"}' |
|
|
49 |
return json |
|
|
50 |
|
|
|
51 |
|
|
|
52 |
json['cluster_id'] = int(attr.get_cluster()) |
|
|
53 |
json['offset'] = int(attr.get_offset()) |
|
|
54 |
|
|
|
55 |
query_id = int(attr.get_query_id()) |
|
|
56 |
cluster_id = int(attr.get_cluster()) |
|
|
57 |
offset = int(attr.get_offset()) |
|
|
58 |
count=int(attr.get_count()) |
|
37
|
59 |
elif int(query) == 0 and int(docId) != 0: |
|
|
60 |
logger.info('ENTER2') |
|
|
61 |
try: |
|
|
62 |
documentaryfile = Documentaryfile.objects.get(pk=int(docId)) |
|
|
63 |
except Documentaryfile.DoesNotExist: |
|
|
64 |
logger.info('ERROR !!') |
|
|
65 |
json = '{"Error": "Invalid documentary id"}' |
|
|
66 |
logger.info(json) |
|
|
67 |
logger.info('LONGUER !!'+str((documentaryfile.cluster_set.all())[int(cluster)].title)) |
|
|
68 |
#for thecluster in documentaryfile.cluster_set.all(): |
|
|
69 |
|
|
|
70 |
|
|
|
71 |
|
|
|
72 |
|
|
|
73 |
|
|
|
74 |
|
|
27
|
75 |
else: |
|
|
76 |
json['cluster_id'] = int(cluster) |
|
37
|
77 |
json['offset'] = int(offset)''' |
|
|
78 |
query_id = int(query) |
|
|
79 |
cluster_id = int(cluster) |
|
|
80 |
offset = int(offset) |
|
|
81 |
count=int(count) |
|
27
|
82 |
|
|
37
|
83 |
logger.info(self.request.session['jsonTreemap']) |
|
27
|
84 |
json['documents'] = [] |
|
|
85 |
article_index=0 |
|
37
|
86 |
#if docid != 0 it is a cluster saved in database |
|
|
87 |
if int(query) == 0: |
|
|
88 |
logger.info('docId != 0') |
|
27
|
89 |
try: |
|
37
|
90 |
documentaryfile = Documentaryfile.objects.get(pk=int(docId)) |
|
|
91 |
except Documentaryfile.DoesNotExist: |
|
|
92 |
logger.info('ERROR !!') |
|
|
93 |
json = '{"Error": "Invalid documentary id"}' |
|
27
|
94 |
logger.info(json) |
|
37
|
95 |
logger.info('LONGUER !!'+str((documentaryfile.cluster_set.all())[int(cluster)].title)) |
|
|
96 |
|
|
|
97 |
my_jsontreemap = simplejson.loads(documentaryfile.jsontreemap) |
|
|
98 |
jsonquery = {'text': my_jsontreemap['query']['text']} |
|
|
99 |
jsonquery['categories'] = my_jsontreemap['query']['categories'] |
|
|
100 |
jsonquery['from_date'] = my_jsontreemap['query']['from_date'] |
|
|
101 |
jsonquery['to_date'] = my_jsontreemap['query']['to_date'] |
|
|
102 |
json['query'] = jsonquery |
|
|
103 |
json['cluster_title'] = (documentaryfile.cluster_set.all())[int(cluster)].title |
|
27
|
104 |
reader = MediapartReader() |
|
37
|
105 |
for thedocument in (documentaryfile.cluster_set.all())[int(cluster)].document.all(): |
|
27
|
106 |
article_index += 1 |
|
37
|
107 |
if article_index - 1 >= offset and article_index - 1 < offset + count: |
|
|
108 |
jsonarticle = {'id':thedocument.documentId} |
|
|
109 |
jsonarticle['title'] = thedocument.title |
|
|
110 |
jsonarticle['abstract'] = thedocument.description |
|
|
111 |
jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId)) |
|
|
112 |
# TODO |
|
|
113 |
jsonarticle['url_image'] = thedocument.image.url |
|
|
114 |
'''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z''' |
|
|
115 |
|
|
|
116 |
jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y') |
|
|
117 |
jsonarticle['category'] = reader.get_category(str(thedocument.documentId)) |
|
27
|
118 |
|
|
37
|
119 |
clusterDoc = (documentaryfile.cluster_set.all())[int(cluster)].clusterdocumentweight_set.get(document=thedocument) |
|
|
120 |
jsonarticle['weight'] = clusterDoc.weight |
|
|
121 |
tags = reader.get_tags(str(thedocument.documentId)) |
|
|
122 |
jsonarticle['tags'] = [] |
|
|
123 |
#tags in mediapart |
|
|
124 |
for tag in tags: |
|
|
125 |
jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")} |
|
|
126 |
jsonarticle['tags'].append(jsontag) |
|
|
127 |
|
|
|
128 |
#tags in periplus |
|
|
129 |
tags = thedocument.tag_set.all() |
|
|
130 |
for tag in tags: |
|
|
131 |
jsontag = {'title':tag.value} |
|
|
132 |
jsonarticle['tags'].append(jsontag) |
|
|
133 |
|
|
|
134 |
author = self.get_author(str(thedocument.documentId)) |
|
|
135 |
jsonarticle['author'] = [] |
|
|
136 |
jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])} |
|
|
137 |
jsonarticle['author'].append(jsonauthor) |
|
27
|
138 |
|
|
37
|
139 |
json['documents'].append(jsonarticle) |
|
|
140 |
jsonarticle['annotations'] = [] |
|
|
141 |
|
|
|
142 |
for theannotationdoc in thedocument.annotationdocument_set.all(): |
|
|
143 |
#Take only the public annotations |
|
|
144 |
if theannotationdoc.visibility == 1: |
|
|
145 |
jsonannotation = {'id':theannotationdoc.id} |
|
|
146 |
jsonannotation['user'] = theannotationdoc.user.username |
|
|
147 |
# Test the scope of the annotation (a part of an article or the global article) |
|
|
148 |
if theannotationdoc.annoted_text: |
|
|
149 |
jsonannotation['annotated_text'] = theannotationdoc.annoted_text |
|
|
150 |
jsonannotation['text'] = theannotationdoc.description |
|
|
151 |
|
|
|
152 |
jsonannotation['tags'] = [] |
|
|
153 |
for theannotationdoctag in theannotationdoc.tag_set.all(): |
|
|
154 |
logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value)) |
|
|
155 |
jsontag = {'id': theannotationdoctag.value} |
|
|
156 |
jsontag = {'title':str(theannotationdoctag.value)} |
|
|
157 |
#TO DO URL ? |
|
|
158 |
jsonannotation['tags'].append(jsontag) |
|
|
159 |
|
|
|
160 |
jsonarticle['annotations'].append(jsonannotation) |
|
27
|
161 |
|
|
|
162 |
#if query_id it is a cluster saved in cache |
|
|
163 |
else: |
|
37
|
164 |
logger.info('query_id present'+str(query_id)) |
|
|
165 |
d = simplejson.loads(self.request.session['jsonTreemap']) |
|
|
166 |
logger.info(d) |
|
|
167 |
jsonquery = {'text': d['query']['text']} |
|
|
168 |
jsonquery['categories'] = d['query']['categories'] |
|
|
169 |
jsonquery['from_date'] = d['query']['from_date'] |
|
|
170 |
jsonquery['to_date'] = d['query']['to_date'] |
|
|
171 |
json['query'] = jsonquery |
|
27
|
172 |
dico = self.get_contextual_data(query_id) |
|
37
|
173 |
logger.info('dico'+str(dico)) |
|
27
|
174 |
if dico['weblab_data']: |
|
|
175 |
list_concepts, concepts_with_detailed_documents_list = dico['weblab_data'] |
|
|
176 |
filtering = dico['filtering_params'] |
|
|
177 |
if not list_concepts: |
|
|
178 |
json = '{"error msg": "no data for the query id"}' |
|
|
179 |
return json |
|
|
180 |
if int(cluster_id) >= len(list_concepts): |
|
|
181 |
json = '{"error msg": "invalid cluster id"}' |
|
|
182 |
return json |
|
|
183 |
categories = filtering['categories'] |
|
37
|
184 |
logger.info('get_documents !!!!') |
|
|
185 |
logger.info(categories) |
|
|
186 |
|
|
|
187 |
time_object1 = time.strptime(filtering['from_date'], '%m/%d/%Y') |
|
|
188 |
from_date = str(int(time.mktime(time_object1))) |
|
|
189 |
logger.info('get_documents 2!!!!'+str(from_date)) |
|
27
|
190 |
if from_date == '': |
|
|
191 |
from_date = 0 |
|
37
|
192 |
|
|
|
193 |
time_object2 = time.strptime(filtering['to_date'], '%m/%d/%Y') |
|
|
194 |
to_date = str(int(time.mktime(time_object2))) |
|
|
195 |
|
|
27
|
196 |
if to_date == '': |
|
|
197 |
to_date = 9999999999 |
|
|
198 |
json['cluster_title'] = list_concepts[cluster_id]['title'] |
|
|
199 |
for document in concepts_with_detailed_documents_list[cluster_id]: |
|
|
200 |
#Filtering by category |
|
37
|
201 |
logger.info('categories) !!!!!!!!!!!!!!!!!!!!!!!!') |
|
|
202 |
logger.info(categories) |
|
|
203 |
logger.info('document[category] !!!!!!!!!!!!!!!!!!!!!!!!') |
|
|
204 |
logger.info(str(document['category'])) |
|
|
205 |
logger.info('document[date] !!!!!!!!!!!!!!!!!!!!!!!!') |
|
|
206 |
logger.info(str(document['date'])) |
|
|
207 |
logger.info('to_date !!!!!!!!!!!!!!!!!!!!!!!!') |
|
|
208 |
logger.info(str(to_date)) |
|
|
209 |
logger.info('from_date !!!!!!!!!!!!!!!!!!!!!!!!') |
|
|
210 |
logger.info(str(from_date)) |
|
|
211 |
|
|
|
212 |
|
|
27
|
213 |
if (categories != [] and document['category'] in categories) or (categories == []): |
|
|
214 |
#Filtering by date |
|
|
215 |
if int(document['date']) >= int(from_date) and int(document['date']) < int(to_date): |
|
37
|
216 |
logger.info('ENTER') |
|
27
|
217 |
article_index += 1 |
|
|
218 |
#Filtering by offset |
|
|
219 |
if article_index - 1 >= offset and article_index - 1 < offset + count: |
|
37
|
220 |
logger.info('ENTER2') |
|
27
|
221 |
jsonarticle = {'id':document['id']} |
|
|
222 |
jsonarticle['title'] = document['title'] |
|
|
223 |
jsonarticle['abstract'] = document['abstract'] |
|
|
224 |
jsonarticle['url_document'] = document['url'] |
|
37
|
225 |
logger.info('ENTER3') |
|
27
|
226 |
# TODO |
|
|
227 |
jsonarticle['url_image'] = document['image_path'] |
|
|
228 |
# |
|
|
229 |
'''jsonarticle['date'] = datetime.fromtimestamp(int(document['date'])).isoformat() + '.0Z''' |
|
|
230 |
locale.setlocale(locale.LC_ALL,'') |
|
37
|
231 |
jsonarticle['date'] = ((datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y')).decode("windows-1252").encode("utf8") |
|
27
|
232 |
jsonarticle['category'] = document['category'] |
|
|
233 |
jsonarticle['weight'] = float(document['weight']) |
|
|
234 |
reader = MediapartReader() |
|
|
235 |
tags = reader.get_tags(str(document['id'])) |
|
37
|
236 |
logger.info('ENTER4') |
|
27
|
237 |
jsonarticle['tags'] = [] |
|
37
|
238 |
logger.info('ENTER5') |
|
27
|
239 |
for tag in tags: |
|
37
|
240 |
logger.info('ENTER6') |
|
27
|
241 |
jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")} |
|
|
242 |
jsonarticle['tags'].append(jsontag) |
|
37
|
243 |
logger.info('ENTER5') |
|
27
|
244 |
author = self.get_author(document['id']) |
|
37
|
245 |
logger.info('ENTER5') |
|
27
|
246 |
jsonarticle['author'] = [] |
|
37
|
247 |
logger.info('ENTER5') |
|
27
|
248 |
jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])} |
|
37
|
249 |
logger.info('ENTER5') |
|
27
|
250 |
jsonarticle['author'].append(jsonauthor) |
|
37
|
251 |
logger.info('ENTER5') |
|
27
|
252 |
json['documents'].append(jsonarticle) |
|
|
253 |
jsonarticle['annotations'] = [] |
|
37
|
254 |
logger.info('jsonarticle') |
|
27
|
255 |
annotations = Annotationdocument.objects.all() |
|
|
256 |
for annotation in annotations: |
|
|
257 |
#Take only the public annotations |
|
|
258 |
if annotation.visibility == 1: |
|
|
259 |
jsonannotation = {'id':annotation.id} |
|
|
260 |
jsonannotation['user'] = annotation.user.username |
|
|
261 |
# Test the scope of the annotation (a part of an article or the global article) |
|
|
262 |
if annotation.annoted_text: |
|
|
263 |
jsonannotation['annotated_text'] = annotation.annoted_text |
|
|
264 |
jsonannotation['text'] = annotation.description |
|
|
265 |
|
|
|
266 |
jsonannotation['tags'] = [] |
|
|
267 |
tags = Tag.objects.filter(annotationdocument_id=annotation.id) |
|
|
268 |
|
|
|
269 |
for tag in tags: |
|
|
270 |
jsontag = {'id': tag.value} |
|
|
271 |
jsontag = {'title':str(tag.value)} |
|
|
272 |
#TO DO URL ? |
|
|
273 |
jsonannotation['tags'].append(jsontag) |
|
|
274 |
|
|
|
275 |
jsonarticle['annotations'].append(jsonannotation) |
|
|
276 |
else: |
|
|
277 |
json = '{"Error: Invalid query id"}' |
|
|
278 |
return json |
|
37
|
279 |
logger.info('jsonarticle2') |
|
27
|
280 |
json['total_count'] = article_index |
|
37
|
281 |
logger.info('jsondocument'+str(json)) |
|
27
|
282 |
result = simplejson.dumps(json) |
|
37
|
283 |
logger.info('result') |
|
|
284 |
logger.info(result) |
|
27
|
285 |
return result |
|
|
286 |
|
|
|
287 |
def get_author(self, document_id): |
|
|
288 |
reader = MediapartReader() |
|
|
289 |
dico = reader.get_author(document_id) |
|
|
290 |
return dico |
|
|
291 |
|
|
|
292 |
def get_contextual_data(self, query_id): |
|
|
293 |
query_context = cache.get(query_id) |
|
37
|
294 |
logger.info('query_id ********** ='+str(query_context['filtering_params'])) |
|
27
|
295 |
if not query_context: |
|
37
|
296 |
logger.info("Error: Invalid query id:"+query_id) |
|
27
|
297 |
logger.info("Error: Invalid query id:"+query_id) |
|
|
298 |
weblab_data=None |
|
|
299 |
query_context ={'filtering_params':{'from_date':0, 'to_date':0, 'categories':[]}} |
|
37
|
300 |
'''else: |
|
27
|
301 |
weblab_data = cache.get(query_context['weblab_data_key']) |
|
37
|
302 |
logger.info('query_context ********** ='+str(self.request.session.items())) |
|
|
303 |
logger.info('query_context ********** ='+str(self.request.session['to_date'])) |
|
|
304 |
logger.info('query_context ********** ='+str(self.request.session['category'])) |
|
|
305 |
query_context ={'filtering_params':{'from_date':self.request.session['from_date'], 'to_date':self.request.session['to_date'], 'categories':self.request.session['category']}}''' |
|
|
306 |
weblab_data = cache.get(query_context['weblab_data_key']) |
|
|
307 |
logger.info('query_context ********** ='+str(cache.get(query_context['weblab_data_key']))) |
|
|
308 |
|
|
27
|
309 |
return {'weblab_data':weblab_data, 'filtering_params':query_context['filtering_params']} |