alcatel/controller/Documents.py
author cobled@FRVILN0H401086.emea.lucent.com
Thu, 24 Jan 2013 16:58:55 +0100
changeset 27 8ca7f2cea729
child 37 3848e1813a30
permissions -rw-r--r--
add alcatel folder

'''
Created on 7 aout 2012

@author: gerard
'''
import logging
import simplejson
import locale
from datetime import datetime 

from django.core.cache import cache
from document.models import Annotationdocument
from document.models import Tag, Cluster
from mediapartdb.MediapartReader import MediapartReader
from dataparser.ClientDocumentsGetAttributes import ClientDocumentsGetAttributes

logger = logging.getLogger('document')

# List of documents of a cluster with annotations
class Documents(object):

    def __init__(self, request):
        self.request = request
    
    def get_documents(self,query,cluster,offset,count):
        json = {}
        
        if query == 0:
            attr = ClientDocumentsGetAttributes(self.request)
       
            if not attr.get_cluster():
                json = '{"error msg": "no cluster_id defined"}'
                return json
            
            if attr.get_offset() == '':
                json = '{"error msg": "no offset defined"}'
                return json
            
            if attr.get_count() == '':
                json = '{"error msg": "no count defined"}'
                return json
            
           
            json['cluster_id'] = int(attr.get_cluster())
            json['offset'] = int(attr.get_offset())
            
            query_id = int(attr.get_query_id())
            cluster_id = int(attr.get_cluster())
            offset = int(attr.get_offset())
            count=int(attr.get_count())
        else:
            json['cluster_id'] = int(cluster)
            json['offset'] = int(offset)
            
            query_id = int(query)
            cluster_id = int(cluster)
            offset = int(offset)
            count=int(count)
        
        self.request.session['query'] = query_id
        self.request.session['cluster'] = cluster_id
        self.request.session['offset'] = offset
        self.request.session['count'] = count
         
        '''print self.request.session['json']      
        json_treemap = simplejson.loads(self.request.session['json'])      
        print json_treemap.query'''
        
        d = simplejson.loads(self.request.session['json'])
        print d     
        jsonquery = {'text': d['query']['text']}
        jsonquery['categories'] = d['query']['categories']
        jsonquery['from_date'] = d['query']['from_date']
        jsonquery['to_date'] = d['query']['to_date']
        json['query'] = jsonquery
           
        json['documents'] = []
        article_index=0
        #if no query_id it is a cluster saved in database
        if not query_id:
            #json = '{"error msg": "query_id is not defined"}'
            try:
                cluster = Cluster.objects.get(pk=cluster_id)
            except Cluster.DoesNotExist:
                json = '{"error": "Invalid cluster id"}'
                logger.info(json)
                return json
            json['cluster_title'] = cluster.title
            reader = MediapartReader()
            for thedocument in cluster.document.all():
                article_index += 1
                jsonarticle = {'id':str(thedocument.documentId)}
                jsonarticle['title'] = str(thedocument.title)
                jsonarticle['abstract'] = str(thedocument.description)
                jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId))
                # TODO
                jsonarticle['url_image'] = thedocument.image.url
                '''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z''' 
                
                jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y')


                jsonarticle['category'] = reader.get_category(str(thedocument.documentId))
                
                clusterDoc = cluster.clusterdocumentweight_set.get(document=thedocument)
                jsonarticle['weight'] = clusterDoc.weight
                tags = reader.get_tags(str(thedocument.documentId))
                jsonarticle['tags'] = []
                #tags in mediapart
                for tag in tags:
                    jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
                    jsonarticle['tags'].append(jsontag)

                #tags in periplus
                tags = thedocument.tag_set.all()
                for tag in tags:
                    jsontag = {'title':tag.value}
                    jsonarticle['tags'].append(jsontag)
                    
                author = self.get_author(str(thedocument.documentId))
                jsonarticle['author'] = []
                jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
                jsonarticle['author'].append(jsonauthor)
                
                json['documents'].append(jsonarticle)
                jsonarticle['annotations'] = []
                
                for theannotationdoc in thedocument.annotationdocument_set.all():
                    #Take only the public annotations
                    if theannotationdoc.visibility == 1:
                        jsonannotation = {'id':theannotationdoc.id}
                        jsonannotation['user'] = theannotationdoc.user.username
                        # Test the scope of the annotation (a part of an article or the global article)
                        if theannotationdoc.annoted_text:
                            jsonannotation['annotated_text'] = theannotationdoc.annoted_text
                        jsonannotation['text'] = theannotationdoc.description
                 
                        jsonannotation['tags'] = []
                        for theannotationdoctag in theannotationdoc.tag_set.all():
                            logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value))
                            jsontag = {'id': theannotationdoctag.value}
                            jsontag = {'title':str(theannotationdoctag.value)}
                            #TO DO URL ?
                            jsonannotation['tags'].append(jsontag)
                                
                        jsonarticle['annotations'].append(jsonannotation)
                                 
        #if query_id it is a cluster saved in cache
        else:
            logger.info('query_id present')
            dico = self.get_contextual_data(query_id)
            if dico['weblab_data']:
                list_concepts, concepts_with_detailed_documents_list = dico['weblab_data']
                filtering = dico['filtering_params']
                if not list_concepts:
                    json = '{"error msg": "no data for the query id"}'
                    return json
                if int(cluster_id) >= len(list_concepts):
                    json = '{"error msg": "invalid cluster id"}'
                    return json
                categories = filtering['categories']
                print 'get_documents !!!!'
                print categories
                from_date = filtering['from_date']
                print 'from_date'
                print from_date
                if from_date == '':
                    from_date = 0
                to_date = filtering['to_date']
                print 'to_date'
                print to_date
                if to_date == '':
                    to_date = 9999999999
                json['cluster_title'] = list_concepts[cluster_id]['title']
                for document in concepts_with_detailed_documents_list[cluster_id]:
                    #Filtering by category
                    if (categories != [] and document['category'] in categories) or (categories == []):
                        #Filtering by date
                        if int(document['date']) >= int(from_date) and int(document['date']) < int(to_date):
                            article_index += 1
                            #Filtering by offset
                            if article_index - 1 >= offset and article_index - 1 < offset + count:
                                jsonarticle = {'id':document['id']}
                                jsonarticle['title'] = document['title']
                                jsonarticle['abstract'] = document['abstract']
                                jsonarticle['url_document'] = document['url']
                                # TODO
                                jsonarticle['url_image'] = document['image_path']
                                #
                                '''jsonarticle['date'] = datetime.fromtimestamp(int(document['date'])).isoformat() + '.0Z''' 
                                locale.setlocale(locale.LC_ALL,'') 
                                jsonarticle['date'] =(datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y')
                                jsonarticle['category'] = document['category']
                                jsonarticle['weight'] = float(document['weight'])
                                reader = MediapartReader()
                                tags = reader.get_tags(str(document['id']))
                                jsonarticle['tags'] = []
                                for tag in tags:
                                    jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
                                    jsonarticle['tags'].append(jsontag)
                                author = self.get_author(document['id'])
                                print document['id']
                                jsonarticle['author'] = []
                                jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
                                jsonarticle['author'].append(jsonauthor)
                                
                                json['documents'].append(jsonarticle)
                                jsonarticle['annotations'] = []
                                
                                annotations = Annotationdocument.objects.all()
                                for annotation in annotations:
                                    #Take only the public annotations
                                    if annotation.visibility == 1:
                                        jsonannotation = {'id':annotation.id}
                                        jsonannotation['user'] = annotation.user.username
                                        # Test the scope of the annotation (a part of an article or the global article)
                                        if annotation.annoted_text:
                                            jsonannotation['annotated_text'] = annotation.annoted_text
                                        jsonannotation['text'] = annotation.description
                                 
                                        jsonannotation['tags'] = []
                                        tags = Tag.objects.filter(annotationdocument_id=annotation.id)
            
                                        for tag in tags:
                                            jsontag = {'id': tag.value}
                                            jsontag = {'title':str(tag.value)}
                                            #TO DO URL ?
                                            jsonannotation['tags'].append(jsontag)
                                                
                                        jsonarticle['annotations'].append(jsonannotation)
            else:
                json = '{"Error: Invalid query id"}'
                return json
        json['total_count'] = article_index
        result = simplejson.dumps(json)
        return result  
    
    def get_author(self, document_id):
        reader = MediapartReader()
        dico = reader.get_author(document_id)
        return dico
        
    def get_contextual_data(self, query_id):
        query_context = cache.get(query_id)
        if not query_context:
            print "Error: Invalid query id:"+query_id
            logger.info("Error: Invalid query id:"+query_id)
            weblab_data=None
            query_context ={'filtering_params':{'from_date':0, 'to_date':0, 'categories':[]}}
        else:
            weblab_data = cache.get(query_context['weblab_data_key'])
            
        return {'weblab_data':weblab_data, 'filtering_params':query_context['filtering_params']}