'''
Created on 7 aout 2012
@author: gerard
'''
import logging
import simplejson
import locale
from datetime import datetime
import time
from django.core.cache import cache
from document.models import Annotationdocument
from document.models import Tag, Cluster
from mediapartdb.MediapartReader import MediapartReader
from dataparser.ClientDocumentsGetAttributes import ClientDocumentsGetAttributes
from document.models import Documentaryfile
logger = logging.getLogger('document')
# List of documents of a cluster with annotations
class Documents(object):
def __init__(self, request):
self.request = request
def get_documents(self,query,cluster,offset,count,docId):
logger.info('get_documents query'+str(query))
logger.info('get_documents cluster'+str(cluster))
logger.info('get_documents offset'+str(offset))
logger.info('get_documents docId'+str(docId))
logger.info('get_documents count'+str(count))
json = {}
'''if int(query) == 0 and int(docId) == 0:
logger.info('ENTER1')
attr = ClientDocumentsGetAttributes(self.request)
if not attr.get_cluster():
json = '{"error msg": "no cluster_id defined"}'
return json
if attr.get_offset() == '':
json = '{"error msg": "no offset defined"}'
return json
if attr.get_count() == '':
json = '{"error msg": "no count defined"}'
return json
json['cluster_id'] = int(attr.get_cluster())
json['offset'] = int(attr.get_offset())
query_id = int(attr.get_query_id())
cluster_id = int(attr.get_cluster())
offset = int(attr.get_offset())
count=int(attr.get_count())
elif int(query) == 0 and int(docId) != 0:
logger.info('ENTER2')
try:
documentaryfile = Documentaryfile.objects.get(pk=int(docId))
except Documentaryfile.DoesNotExist:
logger.info('ERROR !!')
json = '{"Error": "Invalid documentary id"}'
logger.info(json)
logger.info('LONGUER !!'+str((documentaryfile.cluster_set.all())[int(cluster)].title))
#for thecluster in documentaryfile.cluster_set.all():
else:
json['cluster_id'] = int(cluster)
json['offset'] = int(offset)'''
query_id = int(query)
cluster_id = int(cluster)
offset = int(offset)
count=int(count)
logger.info(self.request.session['jsonTreemap'])
json['documents'] = []
article_index=0
#if docid != 0 it is a cluster saved in database
if int(query) == 0:
logger.info('docId != 0')
try:
documentaryfile = Documentaryfile.objects.get(pk=int(docId))
except Documentaryfile.DoesNotExist:
logger.info('ERROR !!')
json = '{"Error": "Invalid documentary id"}'
logger.info(json)
logger.info('LONGUER !!'+str((documentaryfile.cluster_set.all())[int(cluster)].title))
my_jsontreemap = simplejson.loads(documentaryfile.jsontreemap)
jsonquery = {'text': my_jsontreemap['query']['text']}
jsonquery['categories'] = my_jsontreemap['query']['categories']
jsonquery['from_date'] = my_jsontreemap['query']['from_date']
jsonquery['to_date'] = my_jsontreemap['query']['to_date']
json['query'] = jsonquery
json['cluster_title'] = (documentaryfile.cluster_set.all())[int(cluster)].title
reader = MediapartReader()
for thedocument in (documentaryfile.cluster_set.all())[int(cluster)].document.all():
article_index += 1
if article_index - 1 >= offset and article_index - 1 < offset + count:
jsonarticle = {'id':thedocument.documentId}
jsonarticle['title'] = thedocument.title
jsonarticle['abstract'] = thedocument.description
jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId))
# TODO
jsonarticle['url_image'] = thedocument.image.url
'''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z'''
jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y')
jsonarticle['category'] = reader.get_category(str(thedocument.documentId))
clusterDoc = (documentaryfile.cluster_set.all())[int(cluster)].clusterdocumentweight_set.get(document=thedocument)
jsonarticle['weight'] = clusterDoc.weight
tags = reader.get_tags(str(thedocument.documentId))
jsonarticle['tags'] = []
#tags in mediapart
for tag in tags:
jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
jsonarticle['tags'].append(jsontag)
#tags in periplus
tags = thedocument.tag_set.all()
for tag in tags:
jsontag = {'title':tag.value}
jsonarticle['tags'].append(jsontag)
author = self.get_author(str(thedocument.documentId))
jsonarticle['author'] = []
jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
jsonarticle['author'].append(jsonauthor)
json['documents'].append(jsonarticle)
jsonarticle['annotations'] = []
for theannotationdoc in thedocument.annotationdocument_set.all():
#Take only the public annotations
if theannotationdoc.visibility == 1:
jsonannotation = {'id':theannotationdoc.id}
jsonannotation['user'] = theannotationdoc.user.username
# Test the scope of the annotation (a part of an article or the global article)
if theannotationdoc.annoted_text:
jsonannotation['annotated_text'] = theannotationdoc.annoted_text
jsonannotation['text'] = theannotationdoc.description
jsonannotation['tags'] = []
for theannotationdoctag in theannotationdoc.tag_set.all():
logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value))
jsontag = {'id': theannotationdoctag.value}
jsontag = {'title':str(theannotationdoctag.value)}
#TO DO URL ?
jsonannotation['tags'].append(jsontag)
jsonarticle['annotations'].append(jsonannotation)
#if query_id it is a cluster saved in cache
else:
logger.info('query_id present'+str(query_id))
d = simplejson.loads(self.request.session['jsonTreemap'])
logger.info(d)
jsonquery = {'text': d['query']['text']}
jsonquery['categories'] = d['query']['categories']
jsonquery['from_date'] = d['query']['from_date']
jsonquery['to_date'] = d['query']['to_date']
json['query'] = jsonquery
dico = self.get_contextual_data(query_id)
logger.info('dico'+str(dico))
if dico['weblab_data']:
list_concepts, concepts_with_detailed_documents_list = dico['weblab_data']
filtering = dico['filtering_params']
if not list_concepts:
json = '{"error msg": "no data for the query id"}'
return json
if int(cluster_id) >= len(list_concepts):
json = '{"error msg": "invalid cluster id"}'
return json
categories = filtering['categories']
logger.info('get_documents !!!!')
logger.info(categories)
time_object1 = time.strptime(filtering['from_date'], '%m/%d/%Y')
from_date = str(int(time.mktime(time_object1)))
logger.info('get_documents 2!!!!'+str(from_date))
if from_date == '':
from_date = 0
time_object2 = time.strptime(filtering['to_date'], '%m/%d/%Y')
to_date = str(int(time.mktime(time_object2)))
if to_date == '':
to_date = 9999999999
json['cluster_title'] = list_concepts[cluster_id]['title']
for document in concepts_with_detailed_documents_list[cluster_id]:
#Filtering by category
logger.info('categories) !!!!!!!!!!!!!!!!!!!!!!!!')
logger.info(categories)
logger.info('document[category] !!!!!!!!!!!!!!!!!!!!!!!!')
logger.info(str(document['category']))
logger.info('document[date] !!!!!!!!!!!!!!!!!!!!!!!!')
logger.info(str(document['date']))
logger.info('to_date !!!!!!!!!!!!!!!!!!!!!!!!')
logger.info(str(to_date))
logger.info('from_date !!!!!!!!!!!!!!!!!!!!!!!!')
logger.info(str(from_date))
if (categories != [] and document['category'] in categories) or (categories == []):
#Filtering by date
if int(document['date']) >= int(from_date) and int(document['date']) < int(to_date):
logger.info('ENTER')
article_index += 1
#Filtering by offset
if article_index - 1 >= offset and article_index - 1 < offset + count:
logger.info('ENTER2')
jsonarticle = {'id':document['id']}
jsonarticle['title'] = document['title']
jsonarticle['abstract'] = document['abstract']
jsonarticle['url_document'] = document['url']
logger.info('ENTER3')
# TODO
jsonarticle['url_image'] = document['image_path']
#
'''jsonarticle['date'] = datetime.fromtimestamp(int(document['date'])).isoformat() + '.0Z'''
locale.setlocale(locale.LC_ALL,'')
jsonarticle['date'] = ((datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y')).decode("windows-1252").encode("utf8")
jsonarticle['category'] = document['category']
jsonarticle['weight'] = float(document['weight'])
reader = MediapartReader()
tags = reader.get_tags(str(document['id']))
logger.info('ENTER4')
jsonarticle['tags'] = []
logger.info('ENTER5')
for tag in tags:
logger.info('ENTER6')
jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
jsonarticle['tags'].append(jsontag)
logger.info('ENTER5')
author = self.get_author(document['id'])
logger.info('ENTER5')
jsonarticle['author'] = []
logger.info('ENTER5')
jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
logger.info('ENTER5')
jsonarticle['author'].append(jsonauthor)
logger.info('ENTER5')
json['documents'].append(jsonarticle)
jsonarticle['annotations'] = []
logger.info('jsonarticle')
annotations = Annotationdocument.objects.all()
for annotation in annotations:
#Take only the public annotations
if annotation.visibility == 1:
jsonannotation = {'id':annotation.id}
jsonannotation['user'] = annotation.user.username
# Test the scope of the annotation (a part of an article or the global article)
if annotation.annoted_text:
jsonannotation['annotated_text'] = annotation.annoted_text
jsonannotation['text'] = annotation.description
jsonannotation['tags'] = []
tags = Tag.objects.filter(annotationdocument_id=annotation.id)
for tag in tags:
jsontag = {'id': tag.value}
jsontag = {'title':str(tag.value)}
#TO DO URL ?
jsonannotation['tags'].append(jsontag)
jsonarticle['annotations'].append(jsonannotation)
else:
json = '{"Error: Invalid query id"}'
return json
logger.info('jsonarticle2')
json['total_count'] = article_index
logger.info('jsondocument'+str(json))
result = simplejson.dumps(json)
logger.info('result')
logger.info(result)
return result
def get_author(self, document_id):
reader = MediapartReader()
dico = reader.get_author(document_id)
return dico
def get_contextual_data(self, query_id):
query_context = cache.get(query_id)
logger.info('query_id ********** ='+str(query_context['filtering_params']))
if not query_context:
logger.info("Error: Invalid query id:"+query_id)
logger.info("Error: Invalid query id:"+query_id)
weblab_data=None
query_context ={'filtering_params':{'from_date':0, 'to_date':0, 'categories':[]}}
'''else:
weblab_data = cache.get(query_context['weblab_data_key'])
logger.info('query_context ********** ='+str(self.request.session.items()))
logger.info('query_context ********** ='+str(self.request.session['to_date']))
logger.info('query_context ********** ='+str(self.request.session['category']))
query_context ={'filtering_params':{'from_date':self.request.session['from_date'], 'to_date':self.request.session['to_date'], 'categories':self.request.session['category']}}'''
weblab_data = cache.get(query_context['weblab_data_key'])
logger.info('query_context ********** ='+str(cache.get(query_context['weblab_data_key'])))
return {'weblab_data':weblab_data, 'filtering_params':query_context['filtering_params']}