--- a/alcatel/controller/Documents.py Mon Jul 22 14:56:35 2013 +0200
+++ b/alcatel/controller/Documents.py Wed Aug 14 16:36:41 2013 +0200
@@ -7,13 +7,14 @@
import simplejson
import locale
from datetime import datetime
+import time
from django.core.cache import cache
from document.models import Annotationdocument
from document.models import Tag, Cluster
from mediapartdb.MediapartReader import MediapartReader
from dataparser.ClientDocumentsGetAttributes import ClientDocumentsGetAttributes
-
+from document.models import Documentaryfile
logger = logging.getLogger('document')
# List of documents of a cluster with annotations
@@ -22,10 +23,17 @@
def __init__(self, request):
self.request = request
- def get_documents(self,query,cluster,offset,count):
+ def get_documents(self,query,cluster,offset,count,docId):
+ logger.info('get_documents query'+str(query))
+ logger.info('get_documents cluster'+str(cluster))
+ logger.info('get_documents offset'+str(offset))
+ logger.info('get_documents docId'+str(docId))
+ logger.info('get_documents count'+str(count))
+
json = {}
- if query == 0:
+ '''if int(query) == 0 and int(docId) == 0:
+ logger.info('ENTER1')
attr = ClientDocumentsGetAttributes(self.request)
if not attr.get_cluster():
@@ -48,107 +56,121 @@
cluster_id = int(attr.get_cluster())
offset = int(attr.get_offset())
count=int(attr.get_count())
+ elif int(query) == 0 and int(docId) != 0:
+ logger.info('ENTER2')
+ try:
+ documentaryfile = Documentaryfile.objects.get(pk=int(docId))
+ except Documentaryfile.DoesNotExist:
+ logger.info('ERROR !!')
+ json = '{"Error": "Invalid documentary id"}'
+ logger.info(json)
+ logger.info('LONGUER !!'+str((documentaryfile.cluster_set.all())[int(cluster)].title))
+ #for thecluster in documentaryfile.cluster_set.all():
+
+
+
+
+
+
else:
json['cluster_id'] = int(cluster)
- json['offset'] = int(offset)
-
- query_id = int(query)
- cluster_id = int(cluster)
- offset = int(offset)
- count=int(count)
+ json['offset'] = int(offset)'''
+ query_id = int(query)
+ cluster_id = int(cluster)
+ offset = int(offset)
+ count=int(count)
- self.request.session['query'] = query_id
- self.request.session['cluster'] = cluster_id
- self.request.session['offset'] = offset
- self.request.session['count'] = count
-
- '''print self.request.session['json']
- json_treemap = simplejson.loads(self.request.session['json'])
- print json_treemap.query'''
-
- d = simplejson.loads(self.request.session['json'])
- print d
- jsonquery = {'text': d['query']['text']}
- jsonquery['categories'] = d['query']['categories']
- jsonquery['from_date'] = d['query']['from_date']
- jsonquery['to_date'] = d['query']['to_date']
- json['query'] = jsonquery
-
+ logger.info(self.request.session['jsonTreemap'])
json['documents'] = []
article_index=0
- #if no query_id it is a cluster saved in database
- if not query_id:
- #json = '{"error msg": "query_id is not defined"}'
+ #if docid != 0 it is a cluster saved in database
+ if int(query) == 0:
+ logger.info('docId != 0')
try:
- cluster = Cluster.objects.get(pk=cluster_id)
- except Cluster.DoesNotExist:
- json = '{"error": "Invalid cluster id"}'
+ documentaryfile = Documentaryfile.objects.get(pk=int(docId))
+ except Documentaryfile.DoesNotExist:
+ logger.info('ERROR !!')
+ json = '{"Error": "Invalid documentary id"}'
logger.info(json)
- return json
- json['cluster_title'] = cluster.title
+ logger.info('LONGUER !!'+str((documentaryfile.cluster_set.all())[int(cluster)].title))
+
+ my_jsontreemap = simplejson.loads(documentaryfile.jsontreemap)
+ jsonquery = {'text': my_jsontreemap['query']['text']}
+ jsonquery['categories'] = my_jsontreemap['query']['categories']
+ jsonquery['from_date'] = my_jsontreemap['query']['from_date']
+ jsonquery['to_date'] = my_jsontreemap['query']['to_date']
+ json['query'] = jsonquery
+ json['cluster_title'] = (documentaryfile.cluster_set.all())[int(cluster)].title
reader = MediapartReader()
- for thedocument in cluster.document.all():
+ for thedocument in (documentaryfile.cluster_set.all())[int(cluster)].document.all():
article_index += 1
- jsonarticle = {'id':str(thedocument.documentId)}
- jsonarticle['title'] = str(thedocument.title)
- jsonarticle['abstract'] = str(thedocument.description)
- jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId))
- # TODO
- jsonarticle['url_image'] = thedocument.image.url
- '''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z'''
-
- jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y')
-
-
- jsonarticle['category'] = reader.get_category(str(thedocument.documentId))
+ if article_index - 1 >= offset and article_index - 1 < offset + count:
+ jsonarticle = {'id':thedocument.documentId}
+ jsonarticle['title'] = thedocument.title
+ jsonarticle['abstract'] = thedocument.description
+ jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId))
+ # TODO
+ jsonarticle['url_image'] = thedocument.image.url
+ '''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z'''
+
+ jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y')
+ jsonarticle['category'] = reader.get_category(str(thedocument.documentId))
- clusterDoc = cluster.clusterdocumentweight_set.get(document=thedocument)
- jsonarticle['weight'] = clusterDoc.weight
- tags = reader.get_tags(str(thedocument.documentId))
- jsonarticle['tags'] = []
- #tags in mediapart
- for tag in tags:
- jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
- jsonarticle['tags'].append(jsontag)
-
- #tags in periplus
- tags = thedocument.tag_set.all()
- for tag in tags:
- jsontag = {'title':tag.value}
- jsonarticle['tags'].append(jsontag)
+ clusterDoc = (documentaryfile.cluster_set.all())[int(cluster)].clusterdocumentweight_set.get(document=thedocument)
+ jsonarticle['weight'] = clusterDoc.weight
+ tags = reader.get_tags(str(thedocument.documentId))
+ jsonarticle['tags'] = []
+ #tags in mediapart
+ for tag in tags:
+ jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
+ jsonarticle['tags'].append(jsontag)
+
+ #tags in periplus
+ tags = thedocument.tag_set.all()
+ for tag in tags:
+ jsontag = {'title':tag.value}
+ jsonarticle['tags'].append(jsontag)
+
+ author = self.get_author(str(thedocument.documentId))
+ jsonarticle['author'] = []
+ jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
+ jsonarticle['author'].append(jsonauthor)
- author = self.get_author(str(thedocument.documentId))
- jsonarticle['author'] = []
- jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
- jsonarticle['author'].append(jsonauthor)
-
- json['documents'].append(jsonarticle)
- jsonarticle['annotations'] = []
-
- for theannotationdoc in thedocument.annotationdocument_set.all():
- #Take only the public annotations
- if theannotationdoc.visibility == 1:
- jsonannotation = {'id':theannotationdoc.id}
- jsonannotation['user'] = theannotationdoc.user.username
- # Test the scope of the annotation (a part of an article or the global article)
- if theannotationdoc.annoted_text:
- jsonannotation['annotated_text'] = theannotationdoc.annoted_text
- jsonannotation['text'] = theannotationdoc.description
-
- jsonannotation['tags'] = []
- for theannotationdoctag in theannotationdoc.tag_set.all():
- logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value))
- jsontag = {'id': theannotationdoctag.value}
- jsontag = {'title':str(theannotationdoctag.value)}
- #TO DO URL ?
- jsonannotation['tags'].append(jsontag)
-
- jsonarticle['annotations'].append(jsonannotation)
+ json['documents'].append(jsonarticle)
+ jsonarticle['annotations'] = []
+
+ for theannotationdoc in thedocument.annotationdocument_set.all():
+ #Take only the public annotations
+ if theannotationdoc.visibility == 1:
+ jsonannotation = {'id':theannotationdoc.id}
+ jsonannotation['user'] = theannotationdoc.user.username
+ # Test the scope of the annotation (a part of an article or the global article)
+ if theannotationdoc.annoted_text:
+ jsonannotation['annotated_text'] = theannotationdoc.annoted_text
+ jsonannotation['text'] = theannotationdoc.description
+
+ jsonannotation['tags'] = []
+ for theannotationdoctag in theannotationdoc.tag_set.all():
+ logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value))
+ jsontag = {'id': theannotationdoctag.value}
+ jsontag = {'title':str(theannotationdoctag.value)}
+ #TO DO URL ?
+ jsonannotation['tags'].append(jsontag)
+
+ jsonarticle['annotations'].append(jsonannotation)
#if query_id it is a cluster saved in cache
else:
- logger.info('query_id present')
+ logger.info('query_id present'+str(query_id))
+ d = simplejson.loads(self.request.session['jsonTreemap'])
+ logger.info(d)
+ jsonquery = {'text': d['query']['text']}
+ jsonquery['categories'] = d['query']['categories']
+ jsonquery['from_date'] = d['query']['from_date']
+ jsonquery['to_date'] = d['query']['to_date']
+ json['query'] = jsonquery
dico = self.get_contextual_data(query_id)
+ logger.info('dico'+str(dico))
if dico['weblab_data']:
list_concepts, concepts_with_detailed_documents_list = dico['weblab_data']
filtering = dico['filtering_params']
@@ -159,54 +181,77 @@
json = '{"error msg": "invalid cluster id"}'
return json
categories = filtering['categories']
- print 'get_documents !!!!'
- print categories
- from_date = filtering['from_date']
- print 'from_date'
- print from_date
+ logger.info('get_documents !!!!')
+ logger.info(categories)
+
+ time_object1 = time.strptime(filtering['from_date'], '%m/%d/%Y')
+ from_date = str(int(time.mktime(time_object1)))
+ logger.info('get_documents 2!!!!'+str(from_date))
if from_date == '':
from_date = 0
- to_date = filtering['to_date']
- print 'to_date'
- print to_date
+
+ time_object2 = time.strptime(filtering['to_date'], '%m/%d/%Y')
+ to_date = str(int(time.mktime(time_object2)))
+
if to_date == '':
to_date = 9999999999
json['cluster_title'] = list_concepts[cluster_id]['title']
for document in concepts_with_detailed_documents_list[cluster_id]:
#Filtering by category
+ logger.info('categories) !!!!!!!!!!!!!!!!!!!!!!!!')
+ logger.info(categories)
+ logger.info('document[category] !!!!!!!!!!!!!!!!!!!!!!!!')
+ logger.info(str(document['category']))
+ logger.info('document[date] !!!!!!!!!!!!!!!!!!!!!!!!')
+ logger.info(str(document['date']))
+ logger.info('to_date !!!!!!!!!!!!!!!!!!!!!!!!')
+ logger.info(str(to_date))
+ logger.info('from_date !!!!!!!!!!!!!!!!!!!!!!!!')
+ logger.info(str(from_date))
+
+
if (categories != [] and document['category'] in categories) or (categories == []):
#Filtering by date
if int(document['date']) >= int(from_date) and int(document['date']) < int(to_date):
+ logger.info('ENTER')
article_index += 1
#Filtering by offset
if article_index - 1 >= offset and article_index - 1 < offset + count:
+ logger.info('ENTER2')
jsonarticle = {'id':document['id']}
jsonarticle['title'] = document['title']
jsonarticle['abstract'] = document['abstract']
jsonarticle['url_document'] = document['url']
+ logger.info('ENTER3')
# TODO
jsonarticle['url_image'] = document['image_path']
#
'''jsonarticle['date'] = datetime.fromtimestamp(int(document['date'])).isoformat() + '.0Z'''
locale.setlocale(locale.LC_ALL,'')
- jsonarticle['date'] =(datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y')
+ jsonarticle['date'] = ((datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y')).decode("windows-1252").encode("utf8")
jsonarticle['category'] = document['category']
jsonarticle['weight'] = float(document['weight'])
reader = MediapartReader()
tags = reader.get_tags(str(document['id']))
+ logger.info('ENTER4')
jsonarticle['tags'] = []
+ logger.info('ENTER5')
for tag in tags:
+ logger.info('ENTER6')
jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
jsonarticle['tags'].append(jsontag)
+ logger.info('ENTER5')
author = self.get_author(document['id'])
- print document['id']
+ logger.info('ENTER5')
jsonarticle['author'] = []
+ logger.info('ENTER5')
jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
+ logger.info('ENTER5')
jsonarticle['author'].append(jsonauthor)
-
+ logger.info('ENTER5')
json['documents'].append(jsonarticle)
jsonarticle['annotations'] = []
-
+ logger.info('jsonarticle')
annotations = Annotationdocument.objects.all()
for annotation in annotations:
#Take only the public annotations
@@ -231,8 +276,12 @@
else:
json = '{"Error: Invalid query id"}'
return json
+ logger.info('jsonarticle2')
json['total_count'] = article_index
+ logger.info('jsondocument'+str(json))
result = simplejson.dumps(json)
+ logger.info('result')
+ logger.info(result)
return result
def get_author(self, document_id):
@@ -242,12 +291,19 @@
def get_contextual_data(self, query_id):
query_context = cache.get(query_id)
+ logger.info('query_id ********** ='+str(query_context['filtering_params']))
if not query_context:
- print "Error: Invalid query id:"+query_id
+ logger.info("Error: Invalid query id:"+query_id)
logger.info("Error: Invalid query id:"+query_id)
weblab_data=None
query_context ={'filtering_params':{'from_date':0, 'to_date':0, 'categories':[]}}
- else:
+ '''else:
weblab_data = cache.get(query_context['weblab_data_key'])
-
+ logger.info('query_context ********** ='+str(self.request.session.items()))
+ logger.info('query_context ********** ='+str(self.request.session['to_date']))
+ logger.info('query_context ********** ='+str(self.request.session['category']))
+ query_context ={'filtering_params':{'from_date':self.request.session['from_date'], 'to_date':self.request.session['to_date'], 'categories':self.request.session['category']}}'''
+ weblab_data = cache.get(query_context['weblab_data_key'])
+ logger.info('query_context ********** ='+str(cache.get(query_context['weblab_data_key'])))
+
return {'weblab_data':weblab_data, 'filtering_params':query_context['filtering_params']}
\ No newline at end of file