'''
Created on 7 aout 2012
@author: gerard
'''
import logging
import simplejson
import locale
from datetime import datetime
from django.core.cache import cache
from document.models import Annotationdocument
from document.models import Tag, Cluster
from mediapartdb.MediapartReader import MediapartReader
from dataparser.ClientDocumentsGetAttributes import ClientDocumentsGetAttributes
logger = logging.getLogger('document')
# List of documents of a cluster with annotations
class Documents(object):
def __init__(self, request):
self.request = request
def get_documents(self,query,cluster,offset,count):
json = {}
if query == 0:
attr = ClientDocumentsGetAttributes(self.request)
if not attr.get_cluster():
json = '{"error msg": "no cluster_id defined"}'
return json
if attr.get_offset() == '':
json = '{"error msg": "no offset defined"}'
return json
if attr.get_count() == '':
json = '{"error msg": "no count defined"}'
return json
json['cluster_id'] = int(attr.get_cluster())
json['offset'] = int(attr.get_offset())
query_id = int(attr.get_query_id())
cluster_id = int(attr.get_cluster())
offset = int(attr.get_offset())
count=int(attr.get_count())
else:
json['cluster_id'] = int(cluster)
json['offset'] = int(offset)
query_id = int(query)
cluster_id = int(cluster)
offset = int(offset)
count=int(count)
self.request.session['query'] = query_id
self.request.session['cluster'] = cluster_id
self.request.session['offset'] = offset
self.request.session['count'] = count
'''print self.request.session['json']
json_treemap = simplejson.loads(self.request.session['json'])
print json_treemap.query'''
d = simplejson.loads(self.request.session['json'])
print d
jsonquery = {'text': d['query']['text']}
jsonquery['categories'] = d['query']['categories']
jsonquery['from_date'] = d['query']['from_date']
jsonquery['to_date'] = d['query']['to_date']
json['query'] = jsonquery
json['documents'] = []
article_index=0
#if no query_id it is a cluster saved in database
if not query_id:
#json = '{"error msg": "query_id is not defined"}'
try:
cluster = Cluster.objects.get(pk=cluster_id)
except Cluster.DoesNotExist:
json = '{"error": "Invalid cluster id"}'
logger.info(json)
return json
json['cluster_title'] = cluster.title
reader = MediapartReader()
for thedocument in cluster.document.all():
article_index += 1
jsonarticle = {'id':str(thedocument.documentId)}
jsonarticle['title'] = str(thedocument.title)
jsonarticle['abstract'] = str(thedocument.description)
jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId))
# TODO
jsonarticle['url_image'] = thedocument.image.url
'''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z'''
jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y')
jsonarticle['category'] = reader.get_category(str(thedocument.documentId))
clusterDoc = cluster.clusterdocumentweight_set.get(document=thedocument)
jsonarticle['weight'] = clusterDoc.weight
tags = reader.get_tags(str(thedocument.documentId))
jsonarticle['tags'] = []
#tags in mediapart
for tag in tags:
jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
jsonarticle['tags'].append(jsontag)
#tags in periplus
tags = thedocument.tag_set.all()
for tag in tags:
jsontag = {'title':tag.value}
jsonarticle['tags'].append(jsontag)
author = self.get_author(str(thedocument.documentId))
jsonarticle['author'] = []
jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
jsonarticle['author'].append(jsonauthor)
json['documents'].append(jsonarticle)
jsonarticle['annotations'] = []
for theannotationdoc in thedocument.annotationdocument_set.all():
#Take only the public annotations
if theannotationdoc.visibility == 1:
jsonannotation = {'id':theannotationdoc.id}
jsonannotation['user'] = theannotationdoc.user.username
# Test the scope of the annotation (a part of an article or the global article)
if theannotationdoc.annoted_text:
jsonannotation['annotated_text'] = theannotationdoc.annoted_text
jsonannotation['text'] = theannotationdoc.description
jsonannotation['tags'] = []
for theannotationdoctag in theannotationdoc.tag_set.all():
logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value))
jsontag = {'id': theannotationdoctag.value}
jsontag = {'title':str(theannotationdoctag.value)}
#TO DO URL ?
jsonannotation['tags'].append(jsontag)
jsonarticle['annotations'].append(jsonannotation)
#if query_id it is a cluster saved in cache
else:
logger.info('query_id present')
dico = self.get_contextual_data(query_id)
if dico['weblab_data']:
list_concepts, concepts_with_detailed_documents_list = dico['weblab_data']
filtering = dico['filtering_params']
if not list_concepts:
json = '{"error msg": "no data for the query id"}'
return json
if int(cluster_id) >= len(list_concepts):
json = '{"error msg": "invalid cluster id"}'
return json
categories = filtering['categories']
print 'get_documents !!!!'
print categories
from_date = filtering['from_date']
print 'from_date'
print from_date
if from_date == '':
from_date = 0
to_date = filtering['to_date']
print 'to_date'
print to_date
if to_date == '':
to_date = 9999999999
json['cluster_title'] = list_concepts[cluster_id]['title']
for document in concepts_with_detailed_documents_list[cluster_id]:
#Filtering by category
if (categories != [] and document['category'] in categories) or (categories == []):
#Filtering by date
if int(document['date']) >= int(from_date) and int(document['date']) < int(to_date):
article_index += 1
#Filtering by offset
if article_index - 1 >= offset and article_index - 1 < offset + count:
jsonarticle = {'id':document['id']}
jsonarticle['title'] = document['title']
jsonarticle['abstract'] = document['abstract']
jsonarticle['url_document'] = document['url']
# TODO
jsonarticle['url_image'] = document['image_path']
#
'''jsonarticle['date'] = datetime.fromtimestamp(int(document['date'])).isoformat() + '.0Z'''
locale.setlocale(locale.LC_ALL,'')
jsonarticle['date'] =(datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y')
jsonarticle['category'] = document['category']
jsonarticle['weight'] = float(document['weight'])
reader = MediapartReader()
tags = reader.get_tags(str(document['id']))
jsonarticle['tags'] = []
for tag in tags:
jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
jsonarticle['tags'].append(jsontag)
author = self.get_author(document['id'])
print document['id']
jsonarticle['author'] = []
jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
jsonarticle['author'].append(jsonauthor)
json['documents'].append(jsonarticle)
jsonarticle['annotations'] = []
annotations = Annotationdocument.objects.all()
for annotation in annotations:
#Take only the public annotations
if annotation.visibility == 1:
jsonannotation = {'id':annotation.id}
jsonannotation['user'] = annotation.user.username
# Test the scope of the annotation (a part of an article or the global article)
if annotation.annoted_text:
jsonannotation['annotated_text'] = annotation.annoted_text
jsonannotation['text'] = annotation.description
jsonannotation['tags'] = []
tags = Tag.objects.filter(annotationdocument_id=annotation.id)
for tag in tags:
jsontag = {'id': tag.value}
jsontag = {'title':str(tag.value)}
#TO DO URL ?
jsonannotation['tags'].append(jsontag)
jsonarticle['annotations'].append(jsonannotation)
else:
json = '{"Error: Invalid query id"}'
return json
json['total_count'] = article_index
result = simplejson.dumps(json)
return result
def get_author(self, document_id):
reader = MediapartReader()
dico = reader.get_author(document_id)
return dico
def get_contextual_data(self, query_id):
query_context = cache.get(query_id)
if not query_context:
print "Error: Invalid query id:"+query_id
logger.info("Error: Invalid query id:"+query_id)
weblab_data=None
query_context ={'filtering_params':{'from_date':0, 'to_date':0, 'categories':[]}}
else:
weblab_data = cache.get(query_context['weblab_data_key'])
return {'weblab_data':weblab_data, 'filtering_params':query_context['filtering_params']}