alcatel/controller/Documents.py
changeset 37 3848e1813a30
parent 27 8ca7f2cea729
--- a/alcatel/controller/Documents.py	Mon Jul 22 14:56:35 2013 +0200
+++ b/alcatel/controller/Documents.py	Wed Aug 14 16:36:41 2013 +0200
@@ -7,13 +7,14 @@
 import simplejson
 import locale
 from datetime import datetime 
+import time
 
 from django.core.cache import cache
 from document.models import Annotationdocument
 from document.models import Tag, Cluster
 from mediapartdb.MediapartReader import MediapartReader
 from dataparser.ClientDocumentsGetAttributes import ClientDocumentsGetAttributes
-
+from document.models import Documentaryfile
 logger = logging.getLogger('document')
 
 # List of documents of a cluster with annotations
@@ -22,10 +23,17 @@
     def __init__(self, request):
         self.request = request
     
-    def get_documents(self,query,cluster,offset,count):
+    def get_documents(self,query,cluster,offset,count,docId):
+        logger.info('get_documents query'+str(query))
+        logger.info('get_documents cluster'+str(cluster))
+        logger.info('get_documents offset'+str(offset))
+        logger.info('get_documents docId'+str(docId))
+        logger.info('get_documents count'+str(count))
+
         json = {}
         
-        if query == 0:
+        '''if int(query) == 0 and int(docId) == 0:
+            logger.info('ENTER1')
             attr = ClientDocumentsGetAttributes(self.request)
        
             if not attr.get_cluster():
@@ -48,107 +56,121 @@
             cluster_id = int(attr.get_cluster())
             offset = int(attr.get_offset())
             count=int(attr.get_count())
+        elif int(query) == 0 and int(docId) != 0:
+            logger.info('ENTER2')
+            try:
+                documentaryfile = Documentaryfile.objects.get(pk=int(docId))
+            except Documentaryfile.DoesNotExist:
+                logger.info('ERROR !!')
+                json = '{"Error": "Invalid documentary id"}'
+                logger.info(json)
+            logger.info('LONGUER !!'+str((documentaryfile.cluster_set.all())[int(cluster)].title))
+            #for thecluster in documentaryfile.cluster_set.all():     
+                
+                
+                
+                
+                
+                
         else:
             json['cluster_id'] = int(cluster)
-            json['offset'] = int(offset)
-            
-            query_id = int(query)
-            cluster_id = int(cluster)
-            offset = int(offset)
-            count=int(count)
+            json['offset'] = int(offset)'''
+        query_id = int(query)
+        cluster_id = int(cluster)
+        offset = int(offset)
+        count=int(count)
         
-        self.request.session['query'] = query_id
-        self.request.session['cluster'] = cluster_id
-        self.request.session['offset'] = offset
-        self.request.session['count'] = count
-         
-        '''print self.request.session['json']      
-        json_treemap = simplejson.loads(self.request.session['json'])      
-        print json_treemap.query'''
-        
-        d = simplejson.loads(self.request.session['json'])
-        print d     
-        jsonquery = {'text': d['query']['text']}
-        jsonquery['categories'] = d['query']['categories']
-        jsonquery['from_date'] = d['query']['from_date']
-        jsonquery['to_date'] = d['query']['to_date']
-        json['query'] = jsonquery
-           
+        logger.info(self.request.session['jsonTreemap'])      
         json['documents'] = []
         article_index=0
-        #if no query_id it is a cluster saved in database
-        if not query_id:
-            #json = '{"error msg": "query_id is not defined"}'
+        #if docid != 0 it is a cluster saved in database
+        if int(query) == 0:
+            logger.info('docId != 0')
             try:
-                cluster = Cluster.objects.get(pk=cluster_id)
-            except Cluster.DoesNotExist:
-                json = '{"error": "Invalid cluster id"}'
+                documentaryfile = Documentaryfile.objects.get(pk=int(docId))
+            except Documentaryfile.DoesNotExist:
+                logger.info('ERROR !!')
+                json = '{"Error": "Invalid documentary id"}'
                 logger.info(json)
-                return json
-            json['cluster_title'] = cluster.title
+                logger.info('LONGUER !!'+str((documentaryfile.cluster_set.all())[int(cluster)].title))
+            
+            my_jsontreemap = simplejson.loads(documentaryfile.jsontreemap)
+            jsonquery = {'text': my_jsontreemap['query']['text']}
+            jsonquery['categories'] = my_jsontreemap['query']['categories']
+            jsonquery['from_date'] = my_jsontreemap['query']['from_date']
+            jsonquery['to_date'] = my_jsontreemap['query']['to_date']
+            json['query'] = jsonquery
+            json['cluster_title'] = (documentaryfile.cluster_set.all())[int(cluster)].title
             reader = MediapartReader()
-            for thedocument in cluster.document.all():
+            for thedocument in (documentaryfile.cluster_set.all())[int(cluster)].document.all():
                 article_index += 1
-                jsonarticle = {'id':str(thedocument.documentId)}
-                jsonarticle['title'] = str(thedocument.title)
-                jsonarticle['abstract'] = str(thedocument.description)
-                jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId))
-                # TODO
-                jsonarticle['url_image'] = thedocument.image.url
-                '''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z''' 
-                
-                jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y')
-
-
-                jsonarticle['category'] = reader.get_category(str(thedocument.documentId))
+                if article_index - 1 >= offset and article_index - 1 < offset + count:
+                    jsonarticle = {'id':thedocument.documentId}
+                    jsonarticle['title'] = thedocument.title
+                    jsonarticle['abstract'] = thedocument.description
+                    jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId))
+                    # TODO
+                    jsonarticle['url_image'] = thedocument.image.url
+                    '''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z''' 
+                    
+                    jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y')
+                    jsonarticle['category'] = reader.get_category(str(thedocument.documentId))
                 
-                clusterDoc = cluster.clusterdocumentweight_set.get(document=thedocument)
-                jsonarticle['weight'] = clusterDoc.weight
-                tags = reader.get_tags(str(thedocument.documentId))
-                jsonarticle['tags'] = []
-                #tags in mediapart
-                for tag in tags:
-                    jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
-                    jsonarticle['tags'].append(jsontag)
-
-                #tags in periplus
-                tags = thedocument.tag_set.all()
-                for tag in tags:
-                    jsontag = {'title':tag.value}
-                    jsonarticle['tags'].append(jsontag)
+                    clusterDoc = (documentaryfile.cluster_set.all())[int(cluster)].clusterdocumentweight_set.get(document=thedocument)
+                    jsonarticle['weight'] = clusterDoc.weight
+                    tags = reader.get_tags(str(thedocument.documentId))
+                    jsonarticle['tags'] = []
+                    #tags in mediapart
+                    for tag in tags:
+                        jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
+                        jsonarticle['tags'].append(jsontag)
+        
+                    #tags in periplus
+                    tags = thedocument.tag_set.all()
+                    for tag in tags:
+                        jsontag = {'title':tag.value}
+                        jsonarticle['tags'].append(jsontag)
+                        
+                    author = self.get_author(str(thedocument.documentId))
+                    jsonarticle['author'] = []
+                    jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
+                    jsonarticle['author'].append(jsonauthor)
                     
-                author = self.get_author(str(thedocument.documentId))
-                jsonarticle['author'] = []
-                jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
-                jsonarticle['author'].append(jsonauthor)
-                
-                json['documents'].append(jsonarticle)
-                jsonarticle['annotations'] = []
-                
-                for theannotationdoc in thedocument.annotationdocument_set.all():
-                    #Take only the public annotations
-                    if theannotationdoc.visibility == 1:
-                        jsonannotation = {'id':theannotationdoc.id}
-                        jsonannotation['user'] = theannotationdoc.user.username
-                        # Test the scope of the annotation (a part of an article or the global article)
-                        if theannotationdoc.annoted_text:
-                            jsonannotation['annotated_text'] = theannotationdoc.annoted_text
-                        jsonannotation['text'] = theannotationdoc.description
-                 
-                        jsonannotation['tags'] = []
-                        for theannotationdoctag in theannotationdoc.tag_set.all():
-                            logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value))
-                            jsontag = {'id': theannotationdoctag.value}
-                            jsontag = {'title':str(theannotationdoctag.value)}
-                            #TO DO URL ?
-                            jsonannotation['tags'].append(jsontag)
-                                
-                        jsonarticle['annotations'].append(jsonannotation)
+                    json['documents'].append(jsonarticle)
+                    jsonarticle['annotations'] = []
+                    
+                    for theannotationdoc in thedocument.annotationdocument_set.all():
+                        #Take only the public annotations
+                        if theannotationdoc.visibility == 1:
+                            jsonannotation = {'id':theannotationdoc.id}
+                            jsonannotation['user'] = theannotationdoc.user.username
+                            # Test the scope of the annotation (a part of an article or the global article)
+                            if theannotationdoc.annoted_text:
+                                jsonannotation['annotated_text'] = theannotationdoc.annoted_text
+                            jsonannotation['text'] = theannotationdoc.description
+                     
+                            jsonannotation['tags'] = []
+                            for theannotationdoctag in theannotationdoc.tag_set.all():
+                                logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value))
+                                jsontag = {'id': theannotationdoctag.value}
+                                jsontag = {'title':str(theannotationdoctag.value)}
+                                #TO DO URL ?
+                                jsonannotation['tags'].append(jsontag)
+                                    
+                            jsonarticle['annotations'].append(jsonannotation)
                                  
         #if query_id it is a cluster saved in cache
         else:
-            logger.info('query_id present')
+            logger.info('query_id present'+str(query_id))
+            d = simplejson.loads(self.request.session['jsonTreemap'])
+            logger.info(d)     
+            jsonquery = {'text': d['query']['text']}
+            jsonquery['categories'] = d['query']['categories']
+            jsonquery['from_date'] = d['query']['from_date']
+            jsonquery['to_date'] = d['query']['to_date']
+            json['query'] = jsonquery
             dico = self.get_contextual_data(query_id)
+            logger.info('dico'+str(dico))
             if dico['weblab_data']:
                 list_concepts, concepts_with_detailed_documents_list = dico['weblab_data']
                 filtering = dico['filtering_params']
@@ -159,54 +181,77 @@
                     json = '{"error msg": "invalid cluster id"}'
                     return json
                 categories = filtering['categories']
-                print 'get_documents !!!!'
-                print categories
-                from_date = filtering['from_date']
-                print 'from_date'
-                print from_date
+                logger.info('get_documents !!!!')
+                logger.info(categories)
+               
+                time_object1 = time.strptime(filtering['from_date'], '%m/%d/%Y')
+                from_date = str(int(time.mktime(time_object1)))
+                logger.info('get_documents 2!!!!'+str(from_date))
                 if from_date == '':
                     from_date = 0
-                to_date = filtering['to_date']
-                print 'to_date'
-                print to_date
+                
+                time_object2 = time.strptime(filtering['to_date'], '%m/%d/%Y')
+                to_date = str(int(time.mktime(time_object2)))
+                              
                 if to_date == '':
                     to_date = 9999999999
                 json['cluster_title'] = list_concepts[cluster_id]['title']
                 for document in concepts_with_detailed_documents_list[cluster_id]:
                     #Filtering by category
+                    logger.info('categories) !!!!!!!!!!!!!!!!!!!!!!!!')
+                    logger.info(categories)
+                    logger.info('document[category] !!!!!!!!!!!!!!!!!!!!!!!!')
+                    logger.info(str(document['category']))
+                    logger.info('document[date] !!!!!!!!!!!!!!!!!!!!!!!!')
+                    logger.info(str(document['date']))
+                    logger.info('to_date !!!!!!!!!!!!!!!!!!!!!!!!')
+                    logger.info(str(to_date))
+                    logger.info('from_date !!!!!!!!!!!!!!!!!!!!!!!!')
+                    logger.info(str(from_date))
+                    
+                    
                     if (categories != [] and document['category'] in categories) or (categories == []):
                         #Filtering by date
                         if int(document['date']) >= int(from_date) and int(document['date']) < int(to_date):
+                            logger.info('ENTER')
                             article_index += 1
                             #Filtering by offset
                             if article_index - 1 >= offset and article_index - 1 < offset + count:
+                                logger.info('ENTER2')
                                 jsonarticle = {'id':document['id']}
                                 jsonarticle['title'] = document['title']
                                 jsonarticle['abstract'] = document['abstract']
                                 jsonarticle['url_document'] = document['url']
+                                logger.info('ENTER3')
                                 # TODO
                                 jsonarticle['url_image'] = document['image_path']
                                 #
                                 '''jsonarticle['date'] = datetime.fromtimestamp(int(document['date'])).isoformat() + '.0Z''' 
                                 locale.setlocale(locale.LC_ALL,'') 
-                                jsonarticle['date'] =(datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y')
+                                jsonarticle['date'] = ((datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y')).decode("windows-1252").encode("utf8")
                                 jsonarticle['category'] = document['category']
                                 jsonarticle['weight'] = float(document['weight'])
                                 reader = MediapartReader()
                                 tags = reader.get_tags(str(document['id']))
+                                logger.info('ENTER4')
                                 jsonarticle['tags'] = []
+                                logger.info('ENTER5')
                                 for tag in tags:
+                                    logger.info('ENTER6')
                                     jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
                                     jsonarticle['tags'].append(jsontag)
+                                logger.info('ENTER5')
                                 author = self.get_author(document['id'])
-                                print document['id']
+                                logger.info('ENTER5')
                                 jsonarticle['author'] = []
+                                logger.info('ENTER5')
                                 jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
+                                logger.info('ENTER5')
                                 jsonarticle['author'].append(jsonauthor)
-                                
+                                logger.info('ENTER5')
                                 json['documents'].append(jsonarticle)
                                 jsonarticle['annotations'] = []
-                                
+                                logger.info('jsonarticle')
                                 annotations = Annotationdocument.objects.all()
                                 for annotation in annotations:
                                     #Take only the public annotations
@@ -231,8 +276,12 @@
             else:
                 json = '{"Error: Invalid query id"}'
                 return json
+        logger.info('jsonarticle2')
         json['total_count'] = article_index
+        logger.info('jsondocument'+str(json))
         result = simplejson.dumps(json)
+        logger.info('result')
+        logger.info(result)
         return result  
     
     def get_author(self, document_id):
@@ -242,12 +291,19 @@
         
     def get_contextual_data(self, query_id):
         query_context = cache.get(query_id)
+        logger.info('query_id ********** ='+str(query_context['filtering_params']))
         if not query_context:
-            print "Error: Invalid query id:"+query_id
+            logger.info("Error: Invalid query id:"+query_id)
             logger.info("Error: Invalid query id:"+query_id)
             weblab_data=None
             query_context ={'filtering_params':{'from_date':0, 'to_date':0, 'categories':[]}}
-        else:
+        '''else:
             weblab_data = cache.get(query_context['weblab_data_key'])
-            
+            logger.info('query_context ********** ='+str(self.request.session.items()))
+            logger.info('query_context ********** ='+str(self.request.session['to_date']))
+            logger.info('query_context ********** ='+str(self.request.session['category']))
+            query_context ={'filtering_params':{'from_date':self.request.session['from_date'], 'to_date':self.request.session['to_date'], 'categories':self.request.session['category']}}'''
+        weblab_data = cache.get(query_context['weblab_data_key'])
+        logger.info('query_context ********** ='+str(cache.get(query_context['weblab_data_key'])))
+          
         return {'weblab_data':weblab_data, 'filtering_params':query_context['filtering_params']}
\ No newline at end of file