46 |
54 |
47 query_id = int(attr.get_query_id()) |
55 query_id = int(attr.get_query_id()) |
48 cluster_id = int(attr.get_cluster()) |
56 cluster_id = int(attr.get_cluster()) |
49 offset = int(attr.get_offset()) |
57 offset = int(attr.get_offset()) |
50 count=int(attr.get_count()) |
58 count=int(attr.get_count()) |
|
59 elif int(query) == 0 and int(docId) != 0: |
|
60 logger.info('ENTER2') |
|
61 try: |
|
62 documentaryfile = Documentaryfile.objects.get(pk=int(docId)) |
|
63 except Documentaryfile.DoesNotExist: |
|
64 logger.info('ERROR !!') |
|
65 json = '{"Error": "Invalid documentary id"}' |
|
66 logger.info(json) |
|
67 logger.info('LONGUER !!'+str((documentaryfile.cluster_set.all())[int(cluster)].title)) |
|
68 #for thecluster in documentaryfile.cluster_set.all(): |
|
69 |
|
70 |
|
71 |
|
72 |
|
73 |
|
74 |
51 else: |
75 else: |
52 json['cluster_id'] = int(cluster) |
76 json['cluster_id'] = int(cluster) |
53 json['offset'] = int(offset) |
77 json['offset'] = int(offset)''' |
54 |
78 query_id = int(query) |
55 query_id = int(query) |
79 cluster_id = int(cluster) |
56 cluster_id = int(cluster) |
80 offset = int(offset) |
57 offset = int(offset) |
81 count=int(count) |
58 count=int(count) |
|
59 |
82 |
60 self.request.session['query'] = query_id |
83 logger.info(self.request.session['jsonTreemap']) |
61 self.request.session['cluster'] = cluster_id |
|
62 self.request.session['offset'] = offset |
|
63 self.request.session['count'] = count |
|
64 |
|
65 '''print self.request.session['json'] |
|
66 json_treemap = simplejson.loads(self.request.session['json']) |
|
67 print json_treemap.query''' |
|
68 |
|
69 d = simplejson.loads(self.request.session['json']) |
|
70 print d |
|
71 jsonquery = {'text': d['query']['text']} |
|
72 jsonquery['categories'] = d['query']['categories'] |
|
73 jsonquery['from_date'] = d['query']['from_date'] |
|
74 jsonquery['to_date'] = d['query']['to_date'] |
|
75 json['query'] = jsonquery |
|
76 |
|
77 json['documents'] = [] |
84 json['documents'] = [] |
78 article_index=0 |
85 article_index=0 |
79 #if no query_id it is a cluster saved in database |
86 #if docid != 0 it is a cluster saved in database |
80 if not query_id: |
87 if int(query) == 0: |
81 #json = '{"error msg": "query_id is not defined"}' |
88 logger.info('docId != 0') |
82 try: |
89 try: |
83 cluster = Cluster.objects.get(pk=cluster_id) |
90 documentaryfile = Documentaryfile.objects.get(pk=int(docId)) |
84 except Cluster.DoesNotExist: |
91 except Documentaryfile.DoesNotExist: |
85 json = '{"error": "Invalid cluster id"}' |
92 logger.info('ERROR !!') |
|
93 json = '{"Error": "Invalid documentary id"}' |
86 logger.info(json) |
94 logger.info(json) |
87 return json |
95 logger.info('LONGUER !!'+str((documentaryfile.cluster_set.all())[int(cluster)].title)) |
88 json['cluster_title'] = cluster.title |
96 |
|
97 my_jsontreemap = simplejson.loads(documentaryfile.jsontreemap) |
|
98 jsonquery = {'text': my_jsontreemap['query']['text']} |
|
99 jsonquery['categories'] = my_jsontreemap['query']['categories'] |
|
100 jsonquery['from_date'] = my_jsontreemap['query']['from_date'] |
|
101 jsonquery['to_date'] = my_jsontreemap['query']['to_date'] |
|
102 json['query'] = jsonquery |
|
103 json['cluster_title'] = (documentaryfile.cluster_set.all())[int(cluster)].title |
89 reader = MediapartReader() |
104 reader = MediapartReader() |
90 for thedocument in cluster.document.all(): |
105 for thedocument in (documentaryfile.cluster_set.all())[int(cluster)].document.all(): |
91 article_index += 1 |
106 article_index += 1 |
92 jsonarticle = {'id':str(thedocument.documentId)} |
107 if article_index - 1 >= offset and article_index - 1 < offset + count: |
93 jsonarticle['title'] = str(thedocument.title) |
108 jsonarticle = {'id':thedocument.documentId} |
94 jsonarticle['abstract'] = str(thedocument.description) |
109 jsonarticle['title'] = thedocument.title |
95 jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId)) |
110 jsonarticle['abstract'] = thedocument.description |
96 # TODO |
111 jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId)) |
97 jsonarticle['url_image'] = thedocument.image.url |
112 # TODO |
98 '''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z''' |
113 jsonarticle['url_image'] = thedocument.image.url |
99 |
114 '''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z''' |
100 jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y') |
115 |
101 |
116 jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y') |
102 |
117 jsonarticle['category'] = reader.get_category(str(thedocument.documentId)) |
103 jsonarticle['category'] = reader.get_category(str(thedocument.documentId)) |
118 |
104 |
119 clusterDoc = (documentaryfile.cluster_set.all())[int(cluster)].clusterdocumentweight_set.get(document=thedocument) |
105 clusterDoc = cluster.clusterdocumentweight_set.get(document=thedocument) |
120 jsonarticle['weight'] = clusterDoc.weight |
106 jsonarticle['weight'] = clusterDoc.weight |
121 tags = reader.get_tags(str(thedocument.documentId)) |
107 tags = reader.get_tags(str(thedocument.documentId)) |
122 jsonarticle['tags'] = [] |
108 jsonarticle['tags'] = [] |
123 #tags in mediapart |
109 #tags in mediapart |
124 for tag in tags: |
110 for tag in tags: |
125 jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")} |
111 jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")} |
126 jsonarticle['tags'].append(jsontag) |
112 jsonarticle['tags'].append(jsontag) |
127 |
113 |
128 #tags in periplus |
114 #tags in periplus |
129 tags = thedocument.tag_set.all() |
115 tags = thedocument.tag_set.all() |
130 for tag in tags: |
116 for tag in tags: |
131 jsontag = {'title':tag.value} |
117 jsontag = {'title':tag.value} |
132 jsonarticle['tags'].append(jsontag) |
118 jsonarticle['tags'].append(jsontag) |
133 |
119 |
134 author = self.get_author(str(thedocument.documentId)) |
120 author = self.get_author(str(thedocument.documentId)) |
135 jsonarticle['author'] = [] |
121 jsonarticle['author'] = [] |
136 jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])} |
122 jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])} |
137 jsonarticle['author'].append(jsonauthor) |
123 jsonarticle['author'].append(jsonauthor) |
138 |
124 |
139 json['documents'].append(jsonarticle) |
125 json['documents'].append(jsonarticle) |
140 jsonarticle['annotations'] = [] |
126 jsonarticle['annotations'] = [] |
141 |
127 |
142 for theannotationdoc in thedocument.annotationdocument_set.all(): |
128 for theannotationdoc in thedocument.annotationdocument_set.all(): |
143 #Take only the public annotations |
129 #Take only the public annotations |
144 if theannotationdoc.visibility == 1: |
130 if theannotationdoc.visibility == 1: |
145 jsonannotation = {'id':theannotationdoc.id} |
131 jsonannotation = {'id':theannotationdoc.id} |
146 jsonannotation['user'] = theannotationdoc.user.username |
132 jsonannotation['user'] = theannotationdoc.user.username |
147 # Test the scope of the annotation (a part of an article or the global article) |
133 # Test the scope of the annotation (a part of an article or the global article) |
148 if theannotationdoc.annoted_text: |
134 if theannotationdoc.annoted_text: |
149 jsonannotation['annotated_text'] = theannotationdoc.annoted_text |
135 jsonannotation['annotated_text'] = theannotationdoc.annoted_text |
150 jsonannotation['text'] = theannotationdoc.description |
136 jsonannotation['text'] = theannotationdoc.description |
151 |
137 |
152 jsonannotation['tags'] = [] |
138 jsonannotation['tags'] = [] |
153 for theannotationdoctag in theannotationdoc.tag_set.all(): |
139 for theannotationdoctag in theannotationdoc.tag_set.all(): |
154 logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value)) |
140 logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value)) |
155 jsontag = {'id': theannotationdoctag.value} |
141 jsontag = {'id': theannotationdoctag.value} |
156 jsontag = {'title':str(theannotationdoctag.value)} |
142 jsontag = {'title':str(theannotationdoctag.value)} |
157 #TO DO URL ? |
143 #TO DO URL ? |
158 jsonannotation['tags'].append(jsontag) |
144 jsonannotation['tags'].append(jsontag) |
159 |
145 |
160 jsonarticle['annotations'].append(jsonannotation) |
146 jsonarticle['annotations'].append(jsonannotation) |
|
147 |
161 |
148 #if query_id it is a cluster saved in cache |
162 #if query_id it is a cluster saved in cache |
149 else: |
163 else: |
150 logger.info('query_id present') |
164 logger.info('query_id present'+str(query_id)) |
|
165 d = simplejson.loads(self.request.session['jsonTreemap']) |
|
166 logger.info(d) |
|
167 jsonquery = {'text': d['query']['text']} |
|
168 jsonquery['categories'] = d['query']['categories'] |
|
169 jsonquery['from_date'] = d['query']['from_date'] |
|
170 jsonquery['to_date'] = d['query']['to_date'] |
|
171 json['query'] = jsonquery |
151 dico = self.get_contextual_data(query_id) |
172 dico = self.get_contextual_data(query_id) |
|
173 logger.info('dico'+str(dico)) |
152 if dico['weblab_data']: |
174 if dico['weblab_data']: |
153 list_concepts, concepts_with_detailed_documents_list = dico['weblab_data'] |
175 list_concepts, concepts_with_detailed_documents_list = dico['weblab_data'] |
154 filtering = dico['filtering_params'] |
176 filtering = dico['filtering_params'] |
155 if not list_concepts: |
177 if not list_concepts: |
156 json = '{"error msg": "no data for the query id"}' |
178 json = '{"error msg": "no data for the query id"}' |
157 return json |
179 return json |
158 if int(cluster_id) >= len(list_concepts): |
180 if int(cluster_id) >= len(list_concepts): |
159 json = '{"error msg": "invalid cluster id"}' |
181 json = '{"error msg": "invalid cluster id"}' |
160 return json |
182 return json |
161 categories = filtering['categories'] |
183 categories = filtering['categories'] |
162 print 'get_documents !!!!' |
184 logger.info('get_documents !!!!') |
163 print categories |
185 logger.info(categories) |
164 from_date = filtering['from_date'] |
186 |
165 print 'from_date' |
187 time_object1 = time.strptime(filtering['from_date'], '%m/%d/%Y') |
166 print from_date |
188 from_date = str(int(time.mktime(time_object1))) |
|
189 logger.info('get_documents 2!!!!'+str(from_date)) |
167 if from_date == '': |
190 if from_date == '': |
168 from_date = 0 |
191 from_date = 0 |
169 to_date = filtering['to_date'] |
192 |
170 print 'to_date' |
193 time_object2 = time.strptime(filtering['to_date'], '%m/%d/%Y') |
171 print to_date |
194 to_date = str(int(time.mktime(time_object2))) |
|
195 |
172 if to_date == '': |
196 if to_date == '': |
173 to_date = 9999999999 |
197 to_date = 9999999999 |
174 json['cluster_title'] = list_concepts[cluster_id]['title'] |
198 json['cluster_title'] = list_concepts[cluster_id]['title'] |
175 for document in concepts_with_detailed_documents_list[cluster_id]: |
199 for document in concepts_with_detailed_documents_list[cluster_id]: |
176 #Filtering by category |
200 #Filtering by category |
|
201 logger.info('categories) !!!!!!!!!!!!!!!!!!!!!!!!') |
|
202 logger.info(categories) |
|
203 logger.info('document[category] !!!!!!!!!!!!!!!!!!!!!!!!') |
|
204 logger.info(str(document['category'])) |
|
205 logger.info('document[date] !!!!!!!!!!!!!!!!!!!!!!!!') |
|
206 logger.info(str(document['date'])) |
|
207 logger.info('to_date !!!!!!!!!!!!!!!!!!!!!!!!') |
|
208 logger.info(str(to_date)) |
|
209 logger.info('from_date !!!!!!!!!!!!!!!!!!!!!!!!') |
|
210 logger.info(str(from_date)) |
|
211 |
|
212 |
177 if (categories != [] and document['category'] in categories) or (categories == []): |
213 if (categories != [] and document['category'] in categories) or (categories == []): |
178 #Filtering by date |
214 #Filtering by date |
179 if int(document['date']) >= int(from_date) and int(document['date']) < int(to_date): |
215 if int(document['date']) >= int(from_date) and int(document['date']) < int(to_date): |
|
216 logger.info('ENTER') |
180 article_index += 1 |
217 article_index += 1 |
181 #Filtering by offset |
218 #Filtering by offset |
182 if article_index - 1 >= offset and article_index - 1 < offset + count: |
219 if article_index - 1 >= offset and article_index - 1 < offset + count: |
|
220 logger.info('ENTER2') |
183 jsonarticle = {'id':document['id']} |
221 jsonarticle = {'id':document['id']} |
184 jsonarticle['title'] = document['title'] |
222 jsonarticle['title'] = document['title'] |
185 jsonarticle['abstract'] = document['abstract'] |
223 jsonarticle['abstract'] = document['abstract'] |
186 jsonarticle['url_document'] = document['url'] |
224 jsonarticle['url_document'] = document['url'] |
|
225 logger.info('ENTER3') |
187 # TODO |
226 # TODO |
188 jsonarticle['url_image'] = document['image_path'] |
227 jsonarticle['url_image'] = document['image_path'] |
189 # |
228 # |
190 '''jsonarticle['date'] = datetime.fromtimestamp(int(document['date'])).isoformat() + '.0Z''' |
229 '''jsonarticle['date'] = datetime.fromtimestamp(int(document['date'])).isoformat() + '.0Z''' |
191 locale.setlocale(locale.LC_ALL,'') |
230 locale.setlocale(locale.LC_ALL,'') |
192 jsonarticle['date'] =(datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y') |
231 jsonarticle['date'] = ((datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y')).decode("windows-1252").encode("utf8") |
193 jsonarticle['category'] = document['category'] |
232 jsonarticle['category'] = document['category'] |
194 jsonarticle['weight'] = float(document['weight']) |
233 jsonarticle['weight'] = float(document['weight']) |
195 reader = MediapartReader() |
234 reader = MediapartReader() |
196 tags = reader.get_tags(str(document['id'])) |
235 tags = reader.get_tags(str(document['id'])) |
|
236 logger.info('ENTER4') |
197 jsonarticle['tags'] = [] |
237 jsonarticle['tags'] = [] |
|
238 logger.info('ENTER5') |
198 for tag in tags: |
239 for tag in tags: |
|
240 logger.info('ENTER6') |
199 jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")} |
241 jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")} |
200 jsonarticle['tags'].append(jsontag) |
242 jsonarticle['tags'].append(jsontag) |
|
243 logger.info('ENTER5') |
201 author = self.get_author(document['id']) |
244 author = self.get_author(document['id']) |
202 print document['id'] |
245 logger.info('ENTER5') |
203 jsonarticle['author'] = [] |
246 jsonarticle['author'] = [] |
|
247 logger.info('ENTER5') |
204 jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])} |
248 jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])} |
|
249 logger.info('ENTER5') |
205 jsonarticle['author'].append(jsonauthor) |
250 jsonarticle['author'].append(jsonauthor) |
206 |
251 logger.info('ENTER5') |
207 json['documents'].append(jsonarticle) |
252 json['documents'].append(jsonarticle) |
208 jsonarticle['annotations'] = [] |
253 jsonarticle['annotations'] = [] |
209 |
254 logger.info('jsonarticle') |
210 annotations = Annotationdocument.objects.all() |
255 annotations = Annotationdocument.objects.all() |
211 for annotation in annotations: |
256 for annotation in annotations: |
212 #Take only the public annotations |
257 #Take only the public annotations |
213 if annotation.visibility == 1: |
258 if annotation.visibility == 1: |
214 jsonannotation = {'id':annotation.id} |
259 jsonannotation = {'id':annotation.id} |