alcatel/controller/Documents.py
changeset 27 8ca7f2cea729
child 37 3848e1813a30
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/alcatel/controller/Documents.py	Thu Jan 24 16:58:55 2013 +0100
@@ -0,0 +1,253 @@
+'''
+Created on 7 aout 2012
+
+@author: gerard
+'''
+import logging
+import simplejson
+import locale
+from datetime import datetime 
+
+from django.core.cache import cache
+from document.models import Annotationdocument
+from document.models import Tag, Cluster
+from mediapartdb.MediapartReader import MediapartReader
+from dataparser.ClientDocumentsGetAttributes import ClientDocumentsGetAttributes
+
+logger = logging.getLogger('document')
+
+# List of documents of a cluster with annotations
+class Documents(object):
+
+    def __init__(self, request):
+        self.request = request
+    
+    def get_documents(self,query,cluster,offset,count):
+        json = {}
+        
+        if query == 0:
+            attr = ClientDocumentsGetAttributes(self.request)
+       
+            if not attr.get_cluster():
+                json = '{"error msg": "no cluster_id defined"}'
+                return json
+            
+            if attr.get_offset() == '':
+                json = '{"error msg": "no offset defined"}'
+                return json
+            
+            if attr.get_count() == '':
+                json = '{"error msg": "no count defined"}'
+                return json
+            
+           
+            json['cluster_id'] = int(attr.get_cluster())
+            json['offset'] = int(attr.get_offset())
+            
+            query_id = int(attr.get_query_id())
+            cluster_id = int(attr.get_cluster())
+            offset = int(attr.get_offset())
+            count=int(attr.get_count())
+        else:
+            json['cluster_id'] = int(cluster)
+            json['offset'] = int(offset)
+            
+            query_id = int(query)
+            cluster_id = int(cluster)
+            offset = int(offset)
+            count=int(count)
+        
+        self.request.session['query'] = query_id
+        self.request.session['cluster'] = cluster_id
+        self.request.session['offset'] = offset
+        self.request.session['count'] = count
+         
+        '''print self.request.session['json']      
+        json_treemap = simplejson.loads(self.request.session['json'])      
+        print json_treemap.query'''
+        
+        d = simplejson.loads(self.request.session['json'])
+        print d     
+        jsonquery = {'text': d['query']['text']}
+        jsonquery['categories'] = d['query']['categories']
+        jsonquery['from_date'] = d['query']['from_date']
+        jsonquery['to_date'] = d['query']['to_date']
+        json['query'] = jsonquery
+           
+        json['documents'] = []
+        article_index=0
+        #if no query_id it is a cluster saved in database
+        if not query_id:
+            #json = '{"error msg": "query_id is not defined"}'
+            try:
+                cluster = Cluster.objects.get(pk=cluster_id)
+            except Cluster.DoesNotExist:
+                json = '{"error": "Invalid cluster id"}'
+                logger.info(json)
+                return json
+            json['cluster_title'] = cluster.title
+            reader = MediapartReader()
+            for thedocument in cluster.document.all():
+                article_index += 1
+                jsonarticle = {'id':str(thedocument.documentId)}
+                jsonarticle['title'] = str(thedocument.title)
+                jsonarticle['abstract'] = str(thedocument.description)
+                jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId))
+                # TODO
+                jsonarticle['url_image'] = thedocument.image.url
+                '''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z''' 
+                
+                jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y')
+
+
+                jsonarticle['category'] = reader.get_category(str(thedocument.documentId))
+                
+                clusterDoc = cluster.clusterdocumentweight_set.get(document=thedocument)
+                jsonarticle['weight'] = clusterDoc.weight
+                tags = reader.get_tags(str(thedocument.documentId))
+                jsonarticle['tags'] = []
+                #tags in mediapart
+                for tag in tags:
+                    jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
+                    jsonarticle['tags'].append(jsontag)
+
+                #tags in periplus
+                tags = thedocument.tag_set.all()
+                for tag in tags:
+                    jsontag = {'title':tag.value}
+                    jsonarticle['tags'].append(jsontag)
+                    
+                author = self.get_author(str(thedocument.documentId))
+                jsonarticle['author'] = []
+                jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
+                jsonarticle['author'].append(jsonauthor)
+                
+                json['documents'].append(jsonarticle)
+                jsonarticle['annotations'] = []
+                
+                for theannotationdoc in thedocument.annotationdocument_set.all():
+                    #Take only the public annotations
+                    if theannotationdoc.visibility == 1:
+                        jsonannotation = {'id':theannotationdoc.id}
+                        jsonannotation['user'] = theannotationdoc.user.username
+                        # Test the scope of the annotation (a part of an article or the global article)
+                        if theannotationdoc.annoted_text:
+                            jsonannotation['annotated_text'] = theannotationdoc.annoted_text
+                        jsonannotation['text'] = theannotationdoc.description
+                 
+                        jsonannotation['tags'] = []
+                        for theannotationdoctag in theannotationdoc.tag_set.all():
+                            logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value))
+                            jsontag = {'id': theannotationdoctag.value}
+                            jsontag = {'title':str(theannotationdoctag.value)}
+                            #TO DO URL ?
+                            jsonannotation['tags'].append(jsontag)
+                                
+                        jsonarticle['annotations'].append(jsonannotation)
+                                 
+        #if query_id it is a cluster saved in cache
+        else:
+            logger.info('query_id present')
+            dico = self.get_contextual_data(query_id)
+            if dico['weblab_data']:
+                list_concepts, concepts_with_detailed_documents_list = dico['weblab_data']
+                filtering = dico['filtering_params']
+                if not list_concepts:
+                    json = '{"error msg": "no data for the query id"}'
+                    return json
+                if int(cluster_id) >= len(list_concepts):
+                    json = '{"error msg": "invalid cluster id"}'
+                    return json
+                categories = filtering['categories']
+                print 'get_documents !!!!'
+                print categories
+                from_date = filtering['from_date']
+                print 'from_date'
+                print from_date
+                if from_date == '':
+                    from_date = 0
+                to_date = filtering['to_date']
+                print 'to_date'
+                print to_date
+                if to_date == '':
+                    to_date = 9999999999
+                json['cluster_title'] = list_concepts[cluster_id]['title']
+                for document in concepts_with_detailed_documents_list[cluster_id]:
+                    #Filtering by category
+                    if (categories != [] and document['category'] in categories) or (categories == []):
+                        #Filtering by date
+                        if int(document['date']) >= int(from_date) and int(document['date']) < int(to_date):
+                            article_index += 1
+                            #Filtering by offset
+                            if article_index - 1 >= offset and article_index - 1 < offset + count:
+                                jsonarticle = {'id':document['id']}
+                                jsonarticle['title'] = document['title']
+                                jsonarticle['abstract'] = document['abstract']
+                                jsonarticle['url_document'] = document['url']
+                                # TODO
+                                jsonarticle['url_image'] = document['image_path']
+                                #
+                                '''jsonarticle['date'] = datetime.fromtimestamp(int(document['date'])).isoformat() + '.0Z''' 
+                                locale.setlocale(locale.LC_ALL,'') 
+                                jsonarticle['date'] =(datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y')
+                                jsonarticle['category'] = document['category']
+                                jsonarticle['weight'] = float(document['weight'])
+                                reader = MediapartReader()
+                                tags = reader.get_tags(str(document['id']))
+                                jsonarticle['tags'] = []
+                                for tag in tags:
+                                    jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
+                                    jsonarticle['tags'].append(jsontag)
+                                author = self.get_author(document['id'])
+                                print document['id']
+                                jsonarticle['author'] = []
+                                jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
+                                jsonarticle['author'].append(jsonauthor)
+                                
+                                json['documents'].append(jsonarticle)
+                                jsonarticle['annotations'] = []
+                                
+                                annotations = Annotationdocument.objects.all()
+                                for annotation in annotations:
+                                    #Take only the public annotations
+                                    if annotation.visibility == 1:
+                                        jsonannotation = {'id':annotation.id}
+                                        jsonannotation['user'] = annotation.user.username
+                                        # Test the scope of the annotation (a part of an article or the global article)
+                                        if annotation.annoted_text:
+                                            jsonannotation['annotated_text'] = annotation.annoted_text
+                                        jsonannotation['text'] = annotation.description
+                                 
+                                        jsonannotation['tags'] = []
+                                        tags = Tag.objects.filter(annotationdocument_id=annotation.id)
+            
+                                        for tag in tags:
+                                            jsontag = {'id': tag.value}
+                                            jsontag = {'title':str(tag.value)}
+                                            #TO DO URL ?
+                                            jsonannotation['tags'].append(jsontag)
+                                                
+                                        jsonarticle['annotations'].append(jsonannotation)
+            else:
+                json = '{"Error: Invalid query id"}'
+                return json
+        json['total_count'] = article_index
+        result = simplejson.dumps(json)
+        return result  
+    
+    def get_author(self, document_id):
+        reader = MediapartReader()
+        dico = reader.get_author(document_id)
+        return dico
+        
+    def get_contextual_data(self, query_id):
+        query_context = cache.get(query_id)
+        if not query_context:
+            print "Error: Invalid query id:"+query_id
+            logger.info("Error: Invalid query id:"+query_id)
+            weblab_data=None
+            query_context ={'filtering_params':{'from_date':0, 'to_date':0, 'categories':[]}}
+        else:
+            weblab_data = cache.get(query_context['weblab_data_key'])
+            
+        return {'weblab_data':weblab_data, 'filtering_params':query_context['filtering_params']}
\ No newline at end of file