alcatel/controller/Documents.py
author cobled
Wed, 14 Aug 2013 16:36:41 +0200
changeset 37 3848e1813a30
parent 27 8ca7f2cea729
permissions -rw-r--r--
last version

'''
Created on 7 aout 2012

@author: gerard
'''
import logging
import simplejson
import locale
from datetime import datetime 
import time

from django.core.cache import cache
from document.models import Annotationdocument
from document.models import Tag, Cluster
from mediapartdb.MediapartReader import MediapartReader
from dataparser.ClientDocumentsGetAttributes import ClientDocumentsGetAttributes
from document.models import Documentaryfile
logger = logging.getLogger('document')

# List of documents of a cluster with annotations
class Documents(object):

    def __init__(self, request):
        self.request = request
    
    def get_documents(self,query,cluster,offset,count,docId):
        logger.info('get_documents query'+str(query))
        logger.info('get_documents cluster'+str(cluster))
        logger.info('get_documents offset'+str(offset))
        logger.info('get_documents docId'+str(docId))
        logger.info('get_documents count'+str(count))

        json = {}
        
        '''if int(query) == 0 and int(docId) == 0:
            logger.info('ENTER1')
            attr = ClientDocumentsGetAttributes(self.request)
       
            if not attr.get_cluster():
                json = '{"error msg": "no cluster_id defined"}'
                return json
            
            if attr.get_offset() == '':
                json = '{"error msg": "no offset defined"}'
                return json
            
            if attr.get_count() == '':
                json = '{"error msg": "no count defined"}'
                return json
            
           
            json['cluster_id'] = int(attr.get_cluster())
            json['offset'] = int(attr.get_offset())
            
            query_id = int(attr.get_query_id())
            cluster_id = int(attr.get_cluster())
            offset = int(attr.get_offset())
            count=int(attr.get_count())
        elif int(query) == 0 and int(docId) != 0:
            logger.info('ENTER2')
            try:
                documentaryfile = Documentaryfile.objects.get(pk=int(docId))
            except Documentaryfile.DoesNotExist:
                logger.info('ERROR !!')
                json = '{"Error": "Invalid documentary id"}'
                logger.info(json)
            logger.info('LONGUER !!'+str((documentaryfile.cluster_set.all())[int(cluster)].title))
            #for thecluster in documentaryfile.cluster_set.all():     
                
                
                
                
                
                
        else:
            json['cluster_id'] = int(cluster)
            json['offset'] = int(offset)'''
        query_id = int(query)
        cluster_id = int(cluster)
        offset = int(offset)
        count=int(count)
        
        logger.info(self.request.session['jsonTreemap'])      
        json['documents'] = []
        article_index=0
        #if docid != 0 it is a cluster saved in database
        if int(query) == 0:
            logger.info('docId != 0')
            try:
                documentaryfile = Documentaryfile.objects.get(pk=int(docId))
            except Documentaryfile.DoesNotExist:
                logger.info('ERROR !!')
                json = '{"Error": "Invalid documentary id"}'
                logger.info(json)
                logger.info('LONGUER !!'+str((documentaryfile.cluster_set.all())[int(cluster)].title))
            
            my_jsontreemap = simplejson.loads(documentaryfile.jsontreemap)
            jsonquery = {'text': my_jsontreemap['query']['text']}
            jsonquery['categories'] = my_jsontreemap['query']['categories']
            jsonquery['from_date'] = my_jsontreemap['query']['from_date']
            jsonquery['to_date'] = my_jsontreemap['query']['to_date']
            json['query'] = jsonquery
            json['cluster_title'] = (documentaryfile.cluster_set.all())[int(cluster)].title
            reader = MediapartReader()
            for thedocument in (documentaryfile.cluster_set.all())[int(cluster)].document.all():
                article_index += 1
                if article_index - 1 >= offset and article_index - 1 < offset + count:
                    jsonarticle = {'id':thedocument.documentId}
                    jsonarticle['title'] = thedocument.title
                    jsonarticle['abstract'] = thedocument.description
                    jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId))
                    # TODO
                    jsonarticle['url_image'] = thedocument.image.url
                    '''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z''' 
                    
                    jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y')
                    jsonarticle['category'] = reader.get_category(str(thedocument.documentId))
                
                    clusterDoc = (documentaryfile.cluster_set.all())[int(cluster)].clusterdocumentweight_set.get(document=thedocument)
                    jsonarticle['weight'] = clusterDoc.weight
                    tags = reader.get_tags(str(thedocument.documentId))
                    jsonarticle['tags'] = []
                    #tags in mediapart
                    for tag in tags:
                        jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
                        jsonarticle['tags'].append(jsontag)
        
                    #tags in periplus
                    tags = thedocument.tag_set.all()
                    for tag in tags:
                        jsontag = {'title':tag.value}
                        jsonarticle['tags'].append(jsontag)
                        
                    author = self.get_author(str(thedocument.documentId))
                    jsonarticle['author'] = []
                    jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
                    jsonarticle['author'].append(jsonauthor)
                    
                    json['documents'].append(jsonarticle)
                    jsonarticle['annotations'] = []
                    
                    for theannotationdoc in thedocument.annotationdocument_set.all():
                        #Take only the public annotations
                        if theannotationdoc.visibility == 1:
                            jsonannotation = {'id':theannotationdoc.id}
                            jsonannotation['user'] = theannotationdoc.user.username
                            # Test the scope of the annotation (a part of an article or the global article)
                            if theannotationdoc.annoted_text:
                                jsonannotation['annotated_text'] = theannotationdoc.annoted_text
                            jsonannotation['text'] = theannotationdoc.description
                     
                            jsonannotation['tags'] = []
                            for theannotationdoctag in theannotationdoc.tag_set.all():
                                logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value))
                                jsontag = {'id': theannotationdoctag.value}
                                jsontag = {'title':str(theannotationdoctag.value)}
                                #TO DO URL ?
                                jsonannotation['tags'].append(jsontag)
                                    
                            jsonarticle['annotations'].append(jsonannotation)
                                 
        #if query_id it is a cluster saved in cache
        else:
            logger.info('query_id present'+str(query_id))
            d = simplejson.loads(self.request.session['jsonTreemap'])
            logger.info(d)     
            jsonquery = {'text': d['query']['text']}
            jsonquery['categories'] = d['query']['categories']
            jsonquery['from_date'] = d['query']['from_date']
            jsonquery['to_date'] = d['query']['to_date']
            json['query'] = jsonquery
            dico = self.get_contextual_data(query_id)
            logger.info('dico'+str(dico))
            if dico['weblab_data']:
                list_concepts, concepts_with_detailed_documents_list = dico['weblab_data']
                filtering = dico['filtering_params']
                if not list_concepts:
                    json = '{"error msg": "no data for the query id"}'
                    return json
                if int(cluster_id) >= len(list_concepts):
                    json = '{"error msg": "invalid cluster id"}'
                    return json
                categories = filtering['categories']
                logger.info('get_documents !!!!')
                logger.info(categories)
               
                time_object1 = time.strptime(filtering['from_date'], '%m/%d/%Y')
                from_date = str(int(time.mktime(time_object1)))
                logger.info('get_documents 2!!!!'+str(from_date))
                if from_date == '':
                    from_date = 0
                
                time_object2 = time.strptime(filtering['to_date'], '%m/%d/%Y')
                to_date = str(int(time.mktime(time_object2)))
                              
                if to_date == '':
                    to_date = 9999999999
                json['cluster_title'] = list_concepts[cluster_id]['title']
                for document in concepts_with_detailed_documents_list[cluster_id]:
                    #Filtering by category
                    logger.info('categories) !!!!!!!!!!!!!!!!!!!!!!!!')
                    logger.info(categories)
                    logger.info('document[category] !!!!!!!!!!!!!!!!!!!!!!!!')
                    logger.info(str(document['category']))
                    logger.info('document[date] !!!!!!!!!!!!!!!!!!!!!!!!')
                    logger.info(str(document['date']))
                    logger.info('to_date !!!!!!!!!!!!!!!!!!!!!!!!')
                    logger.info(str(to_date))
                    logger.info('from_date !!!!!!!!!!!!!!!!!!!!!!!!')
                    logger.info(str(from_date))
                    
                    
                    if (categories != [] and document['category'] in categories) or (categories == []):
                        #Filtering by date
                        if int(document['date']) >= int(from_date) and int(document['date']) < int(to_date):
                            logger.info('ENTER')
                            article_index += 1
                            #Filtering by offset
                            if article_index - 1 >= offset and article_index - 1 < offset + count:
                                logger.info('ENTER2')
                                jsonarticle = {'id':document['id']}
                                jsonarticle['title'] = document['title']
                                jsonarticle['abstract'] = document['abstract']
                                jsonarticle['url_document'] = document['url']
                                logger.info('ENTER3')
                                # TODO
                                jsonarticle['url_image'] = document['image_path']
                                #
                                '''jsonarticle['date'] = datetime.fromtimestamp(int(document['date'])).isoformat() + '.0Z''' 
                                locale.setlocale(locale.LC_ALL,'') 
                                jsonarticle['date'] = ((datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y')).decode("windows-1252").encode("utf8")
                                jsonarticle['category'] = document['category']
                                jsonarticle['weight'] = float(document['weight'])
                                reader = MediapartReader()
                                tags = reader.get_tags(str(document['id']))
                                logger.info('ENTER4')
                                jsonarticle['tags'] = []
                                logger.info('ENTER5')
                                for tag in tags:
                                    logger.info('ENTER6')
                                    jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
                                    jsonarticle['tags'].append(jsontag)
                                logger.info('ENTER5')
                                author = self.get_author(document['id'])
                                logger.info('ENTER5')
                                jsonarticle['author'] = []
                                logger.info('ENTER5')
                                jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
                                logger.info('ENTER5')
                                jsonarticle['author'].append(jsonauthor)
                                logger.info('ENTER5')
                                json['documents'].append(jsonarticle)
                                jsonarticle['annotations'] = []
                                logger.info('jsonarticle')
                                annotations = Annotationdocument.objects.all()
                                for annotation in annotations:
                                    #Take only the public annotations
                                    if annotation.visibility == 1:
                                        jsonannotation = {'id':annotation.id}
                                        jsonannotation['user'] = annotation.user.username
                                        # Test the scope of the annotation (a part of an article or the global article)
                                        if annotation.annoted_text:
                                            jsonannotation['annotated_text'] = annotation.annoted_text
                                        jsonannotation['text'] = annotation.description
                                 
                                        jsonannotation['tags'] = []
                                        tags = Tag.objects.filter(annotationdocument_id=annotation.id)
            
                                        for tag in tags:
                                            jsontag = {'id': tag.value}
                                            jsontag = {'title':str(tag.value)}
                                            #TO DO URL ?
                                            jsonannotation['tags'].append(jsontag)
                                                
                                        jsonarticle['annotations'].append(jsonannotation)
            else:
                json = '{"Error: Invalid query id"}'
                return json
        logger.info('jsonarticle2')
        json['total_count'] = article_index
        logger.info('jsondocument'+str(json))
        result = simplejson.dumps(json)
        logger.info('result')
        logger.info(result)
        return result  
    
    def get_author(self, document_id):
        reader = MediapartReader()
        dico = reader.get_author(document_id)
        return dico
        
    def get_contextual_data(self, query_id):
        query_context = cache.get(query_id)
        logger.info('query_id ********** ='+str(query_context['filtering_params']))
        if not query_context:
            logger.info("Error: Invalid query id:"+query_id)
            logger.info("Error: Invalid query id:"+query_id)
            weblab_data=None
            query_context ={'filtering_params':{'from_date':0, 'to_date':0, 'categories':[]}}
        '''else:
            weblab_data = cache.get(query_context['weblab_data_key'])
            logger.info('query_context ********** ='+str(self.request.session.items()))
            logger.info('query_context ********** ='+str(self.request.session['to_date']))
            logger.info('query_context ********** ='+str(self.request.session['category']))
            query_context ={'filtering_params':{'from_date':self.request.session['from_date'], 'to_date':self.request.session['to_date'], 'categories':self.request.session['category']}}'''
        weblab_data = cache.get(query_context['weblab_data_key'])
        logger.info('query_context ********** ='+str(cache.get(query_context['weblab_data_key'])))
          
        return {'weblab_data':weblab_data, 'filtering_params':query_context['filtering_params']}