--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/alcatel/controller/Documents.py Thu Jan 24 16:58:55 2013 +0100
@@ -0,0 +1,253 @@
+'''
+Created on 7 aout 2012
+
+@author: gerard
+'''
+import logging
+import simplejson
+import locale
+from datetime import datetime
+
+from django.core.cache import cache
+from document.models import Annotationdocument
+from document.models import Tag, Cluster
+from mediapartdb.MediapartReader import MediapartReader
+from dataparser.ClientDocumentsGetAttributes import ClientDocumentsGetAttributes
+
+logger = logging.getLogger('document')
+
+# List of documents of a cluster with annotations
+class Documents(object):
+
+ def __init__(self, request):
+ self.request = request
+
+ def get_documents(self,query,cluster,offset,count):
+ json = {}
+
+ if query == 0:
+ attr = ClientDocumentsGetAttributes(self.request)
+
+ if not attr.get_cluster():
+ json = '{"error msg": "no cluster_id defined"}'
+ return json
+
+ if attr.get_offset() == '':
+ json = '{"error msg": "no offset defined"}'
+ return json
+
+ if attr.get_count() == '':
+ json = '{"error msg": "no count defined"}'
+ return json
+
+
+ json['cluster_id'] = int(attr.get_cluster())
+ json['offset'] = int(attr.get_offset())
+
+ query_id = int(attr.get_query_id())
+ cluster_id = int(attr.get_cluster())
+ offset = int(attr.get_offset())
+ count=int(attr.get_count())
+ else:
+ json['cluster_id'] = int(cluster)
+ json['offset'] = int(offset)
+
+ query_id = int(query)
+ cluster_id = int(cluster)
+ offset = int(offset)
+ count=int(count)
+
+ self.request.session['query'] = query_id
+ self.request.session['cluster'] = cluster_id
+ self.request.session['offset'] = offset
+ self.request.session['count'] = count
+
+ '''print self.request.session['json']
+ json_treemap = simplejson.loads(self.request.session['json'])
+ print json_treemap.query'''
+
+ d = simplejson.loads(self.request.session['json'])
+ print d
+ jsonquery = {'text': d['query']['text']}
+ jsonquery['categories'] = d['query']['categories']
+ jsonquery['from_date'] = d['query']['from_date']
+ jsonquery['to_date'] = d['query']['to_date']
+ json['query'] = jsonquery
+
+ json['documents'] = []
+ article_index=0
+ #if no query_id it is a cluster saved in database
+ if not query_id:
+ #json = '{"error msg": "query_id is not defined"}'
+ try:
+ cluster = Cluster.objects.get(pk=cluster_id)
+ except Cluster.DoesNotExist:
+ json = '{"error": "Invalid cluster id"}'
+ logger.info(json)
+ return json
+ json['cluster_title'] = cluster.title
+ reader = MediapartReader()
+ for thedocument in cluster.document.all():
+ article_index += 1
+ jsonarticle = {'id':str(thedocument.documentId)}
+ jsonarticle['title'] = str(thedocument.title)
+ jsonarticle['abstract'] = str(thedocument.description)
+ jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId))
+ # TODO
+ jsonarticle['url_image'] = thedocument.image.url
+ '''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z'''
+
+ jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y')
+
+
+ jsonarticle['category'] = reader.get_category(str(thedocument.documentId))
+
+ clusterDoc = cluster.clusterdocumentweight_set.get(document=thedocument)
+ jsonarticle['weight'] = clusterDoc.weight
+ tags = reader.get_tags(str(thedocument.documentId))
+ jsonarticle['tags'] = []
+ #tags in mediapart
+ for tag in tags:
+ jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
+ jsonarticle['tags'].append(jsontag)
+
+ #tags in periplus
+ tags = thedocument.tag_set.all()
+ for tag in tags:
+ jsontag = {'title':tag.value}
+ jsonarticle['tags'].append(jsontag)
+
+ author = self.get_author(str(thedocument.documentId))
+ jsonarticle['author'] = []
+ jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
+ jsonarticle['author'].append(jsonauthor)
+
+ json['documents'].append(jsonarticle)
+ jsonarticle['annotations'] = []
+
+ for theannotationdoc in thedocument.annotationdocument_set.all():
+ #Take only the public annotations
+ if theannotationdoc.visibility == 1:
+ jsonannotation = {'id':theannotationdoc.id}
+ jsonannotation['user'] = theannotationdoc.user.username
+ # Test the scope of the annotation (a part of an article or the global article)
+ if theannotationdoc.annoted_text:
+ jsonannotation['annotated_text'] = theannotationdoc.annoted_text
+ jsonannotation['text'] = theannotationdoc.description
+
+ jsonannotation['tags'] = []
+ for theannotationdoctag in theannotationdoc.tag_set.all():
+ logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value))
+ jsontag = {'id': theannotationdoctag.value}
+ jsontag = {'title':str(theannotationdoctag.value)}
+ #TO DO URL ?
+ jsonannotation['tags'].append(jsontag)
+
+ jsonarticle['annotations'].append(jsonannotation)
+
+ #if query_id it is a cluster saved in cache
+ else:
+ logger.info('query_id present')
+ dico = self.get_contextual_data(query_id)
+ if dico['weblab_data']:
+ list_concepts, concepts_with_detailed_documents_list = dico['weblab_data']
+ filtering = dico['filtering_params']
+ if not list_concepts:
+ json = '{"error msg": "no data for the query id"}'
+ return json
+ if int(cluster_id) >= len(list_concepts):
+ json = '{"error msg": "invalid cluster id"}'
+ return json
+ categories = filtering['categories']
+ print 'get_documents !!!!'
+ print categories
+ from_date = filtering['from_date']
+ print 'from_date'
+ print from_date
+ if from_date == '':
+ from_date = 0
+ to_date = filtering['to_date']
+ print 'to_date'
+ print to_date
+ if to_date == '':
+ to_date = 9999999999
+ json['cluster_title'] = list_concepts[cluster_id]['title']
+ for document in concepts_with_detailed_documents_list[cluster_id]:
+ #Filtering by category
+ if (categories != [] and document['category'] in categories) or (categories == []):
+ #Filtering by date
+ if int(document['date']) >= int(from_date) and int(document['date']) < int(to_date):
+ article_index += 1
+ #Filtering by offset
+ if article_index - 1 >= offset and article_index - 1 < offset + count:
+ jsonarticle = {'id':document['id']}
+ jsonarticle['title'] = document['title']
+ jsonarticle['abstract'] = document['abstract']
+ jsonarticle['url_document'] = document['url']
+ # TODO
+ jsonarticle['url_image'] = document['image_path']
+ #
+ '''jsonarticle['date'] = datetime.fromtimestamp(int(document['date'])).isoformat() + '.0Z'''
+ locale.setlocale(locale.LC_ALL,'')
+ jsonarticle['date'] =(datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y')
+ jsonarticle['category'] = document['category']
+ jsonarticle['weight'] = float(document['weight'])
+ reader = MediapartReader()
+ tags = reader.get_tags(str(document['id']))
+ jsonarticle['tags'] = []
+ for tag in tags:
+ jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
+ jsonarticle['tags'].append(jsontag)
+ author = self.get_author(document['id'])
+ print document['id']
+ jsonarticle['author'] = []
+ jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
+ jsonarticle['author'].append(jsonauthor)
+
+ json['documents'].append(jsonarticle)
+ jsonarticle['annotations'] = []
+
+ annotations = Annotationdocument.objects.all()
+ for annotation in annotations:
+ #Take only the public annotations
+ if annotation.visibility == 1:
+ jsonannotation = {'id':annotation.id}
+ jsonannotation['user'] = annotation.user.username
+ # Test the scope of the annotation (a part of an article or the global article)
+ if annotation.annoted_text:
+ jsonannotation['annotated_text'] = annotation.annoted_text
+ jsonannotation['text'] = annotation.description
+
+ jsonannotation['tags'] = []
+ tags = Tag.objects.filter(annotationdocument_id=annotation.id)
+
+ for tag in tags:
+ jsontag = {'id': tag.value}
+ jsontag = {'title':str(tag.value)}
+ #TO DO URL ?
+ jsonannotation['tags'].append(jsontag)
+
+ jsonarticle['annotations'].append(jsonannotation)
+ else:
+ json = '{"Error: Invalid query id"}'
+ return json
+ json['total_count'] = article_index
+ result = simplejson.dumps(json)
+ return result
+
+ def get_author(self, document_id):
+ reader = MediapartReader()
+ dico = reader.get_author(document_id)
+ return dico
+
+ def get_contextual_data(self, query_id):
+ query_context = cache.get(query_id)
+ if not query_context:
+ print "Error: Invalid query id:"+query_id
+ logger.info("Error: Invalid query id:"+query_id)
+ weblab_data=None
+ query_context ={'filtering_params':{'from_date':0, 'to_date':0, 'categories':[]}}
+ else:
+ weblab_data = cache.get(query_context['weblab_data_key'])
+
+ return {'weblab_data':weblab_data, 'filtering_params':query_context['filtering_params']}
\ No newline at end of file