diff -r 94f586daa623 -r 8ca7f2cea729 alcatel/controller/Documents.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/alcatel/controller/Documents.py Thu Jan 24 16:58:55 2013 +0100 @@ -0,0 +1,253 @@ +''' +Created on 7 aout 2012 + +@author: gerard +''' +import logging +import simplejson +import locale +from datetime import datetime + +from django.core.cache import cache +from document.models import Annotationdocument +from document.models import Tag, Cluster +from mediapartdb.MediapartReader import MediapartReader +from dataparser.ClientDocumentsGetAttributes import ClientDocumentsGetAttributes + +logger = logging.getLogger('document') + +# List of documents of a cluster with annotations +class Documents(object): + + def __init__(self, request): + self.request = request + + def get_documents(self,query,cluster,offset,count): + json = {} + + if query == 0: + attr = ClientDocumentsGetAttributes(self.request) + + if not attr.get_cluster(): + json = '{"error msg": "no cluster_id defined"}' + return json + + if attr.get_offset() == '': + json = '{"error msg": "no offset defined"}' + return json + + if attr.get_count() == '': + json = '{"error msg": "no count defined"}' + return json + + + json['cluster_id'] = int(attr.get_cluster()) + json['offset'] = int(attr.get_offset()) + + query_id = int(attr.get_query_id()) + cluster_id = int(attr.get_cluster()) + offset = int(attr.get_offset()) + count=int(attr.get_count()) + else: + json['cluster_id'] = int(cluster) + json['offset'] = int(offset) + + query_id = int(query) + cluster_id = int(cluster) + offset = int(offset) + count=int(count) + + self.request.session['query'] = query_id + self.request.session['cluster'] = cluster_id + self.request.session['offset'] = offset + self.request.session['count'] = count + + '''print self.request.session['json'] + json_treemap = simplejson.loads(self.request.session['json']) + print json_treemap.query''' + + d = simplejson.loads(self.request.session['json']) + print d + jsonquery = {'text': d['query']['text']} + jsonquery['categories'] = d['query']['categories'] + jsonquery['from_date'] = d['query']['from_date'] + jsonquery['to_date'] = d['query']['to_date'] + json['query'] = jsonquery + + json['documents'] = [] + article_index=0 + #if no query_id it is a cluster saved in database + if not query_id: + #json = '{"error msg": "query_id is not defined"}' + try: + cluster = Cluster.objects.get(pk=cluster_id) + except Cluster.DoesNotExist: + json = '{"error": "Invalid cluster id"}' + logger.info(json) + return json + json['cluster_title'] = cluster.title + reader = MediapartReader() + for thedocument in cluster.document.all(): + article_index += 1 + jsonarticle = {'id':str(thedocument.documentId)} + jsonarticle['title'] = str(thedocument.title) + jsonarticle['abstract'] = str(thedocument.description) + jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId)) + # TODO + jsonarticle['url_image'] = thedocument.image.url + '''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z''' + + jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y') + + + jsonarticle['category'] = reader.get_category(str(thedocument.documentId)) + + clusterDoc = cluster.clusterdocumentweight_set.get(document=thedocument) + jsonarticle['weight'] = clusterDoc.weight + tags = reader.get_tags(str(thedocument.documentId)) + jsonarticle['tags'] = [] + #tags in mediapart + for tag in tags: + jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")} + jsonarticle['tags'].append(jsontag) + + #tags in periplus + tags = thedocument.tag_set.all() + for tag in tags: + jsontag = {'title':tag.value} + jsonarticle['tags'].append(jsontag) + + author = self.get_author(str(thedocument.documentId)) + jsonarticle['author'] = [] + jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])} + jsonarticle['author'].append(jsonauthor) + + json['documents'].append(jsonarticle) + jsonarticle['annotations'] = [] + + for theannotationdoc in thedocument.annotationdocument_set.all(): + #Take only the public annotations + if theannotationdoc.visibility == 1: + jsonannotation = {'id':theannotationdoc.id} + jsonannotation['user'] = theannotationdoc.user.username + # Test the scope of the annotation (a part of an article or the global article) + if theannotationdoc.annoted_text: + jsonannotation['annotated_text'] = theannotationdoc.annoted_text + jsonannotation['text'] = theannotationdoc.description + + jsonannotation['tags'] = [] + for theannotationdoctag in theannotationdoc.tag_set.all(): + logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value)) + jsontag = {'id': theannotationdoctag.value} + jsontag = {'title':str(theannotationdoctag.value)} + #TO DO URL ? + jsonannotation['tags'].append(jsontag) + + jsonarticle['annotations'].append(jsonannotation) + + #if query_id it is a cluster saved in cache + else: + logger.info('query_id present') + dico = self.get_contextual_data(query_id) + if dico['weblab_data']: + list_concepts, concepts_with_detailed_documents_list = dico['weblab_data'] + filtering = dico['filtering_params'] + if not list_concepts: + json = '{"error msg": "no data for the query id"}' + return json + if int(cluster_id) >= len(list_concepts): + json = '{"error msg": "invalid cluster id"}' + return json + categories = filtering['categories'] + print 'get_documents !!!!' + print categories + from_date = filtering['from_date'] + print 'from_date' + print from_date + if from_date == '': + from_date = 0 + to_date = filtering['to_date'] + print 'to_date' + print to_date + if to_date == '': + to_date = 9999999999 + json['cluster_title'] = list_concepts[cluster_id]['title'] + for document in concepts_with_detailed_documents_list[cluster_id]: + #Filtering by category + if (categories != [] and document['category'] in categories) or (categories == []): + #Filtering by date + if int(document['date']) >= int(from_date) and int(document['date']) < int(to_date): + article_index += 1 + #Filtering by offset + if article_index - 1 >= offset and article_index - 1 < offset + count: + jsonarticle = {'id':document['id']} + jsonarticle['title'] = document['title'] + jsonarticle['abstract'] = document['abstract'] + jsonarticle['url_document'] = document['url'] + # TODO + jsonarticle['url_image'] = document['image_path'] + # + '''jsonarticle['date'] = datetime.fromtimestamp(int(document['date'])).isoformat() + '.0Z''' + locale.setlocale(locale.LC_ALL,'') + jsonarticle['date'] =(datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y') + jsonarticle['category'] = document['category'] + jsonarticle['weight'] = float(document['weight']) + reader = MediapartReader() + tags = reader.get_tags(str(document['id'])) + jsonarticle['tags'] = [] + for tag in tags: + jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")} + jsonarticle['tags'].append(jsontag) + author = self.get_author(document['id']) + print document['id'] + jsonarticle['author'] = [] + jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])} + jsonarticle['author'].append(jsonauthor) + + json['documents'].append(jsonarticle) + jsonarticle['annotations'] = [] + + annotations = Annotationdocument.objects.all() + for annotation in annotations: + #Take only the public annotations + if annotation.visibility == 1: + jsonannotation = {'id':annotation.id} + jsonannotation['user'] = annotation.user.username + # Test the scope of the annotation (a part of an article or the global article) + if annotation.annoted_text: + jsonannotation['annotated_text'] = annotation.annoted_text + jsonannotation['text'] = annotation.description + + jsonannotation['tags'] = [] + tags = Tag.objects.filter(annotationdocument_id=annotation.id) + + for tag in tags: + jsontag = {'id': tag.value} + jsontag = {'title':str(tag.value)} + #TO DO URL ? + jsonannotation['tags'].append(jsontag) + + jsonarticle['annotations'].append(jsonannotation) + else: + json = '{"Error: Invalid query id"}' + return json + json['total_count'] = article_index + result = simplejson.dumps(json) + return result + + def get_author(self, document_id): + reader = MediapartReader() + dico = reader.get_author(document_id) + return dico + + def get_contextual_data(self, query_id): + query_context = cache.get(query_id) + if not query_context: + print "Error: Invalid query id:"+query_id + logger.info("Error: Invalid query id:"+query_id) + weblab_data=None + query_context ={'filtering_params':{'from_date':0, 'to_date':0, 'categories':[]}} + else: + weblab_data = cache.get(query_context['weblab_data_key']) + + return {'weblab_data':weblab_data, 'filtering_params':query_context['filtering_params']} \ No newline at end of file