alcatel/controller/Documents.py
changeset 37 3848e1813a30
parent 27 8ca7f2cea729
equal deleted inserted replaced
36:bad0e6c60b63 37:3848e1813a30
     5 '''
     5 '''
     6 import logging
     6 import logging
     7 import simplejson
     7 import simplejson
     8 import locale
     8 import locale
     9 from datetime import datetime 
     9 from datetime import datetime 
       
    10 import time
    10 
    11 
    11 from django.core.cache import cache
    12 from django.core.cache import cache
    12 from document.models import Annotationdocument
    13 from document.models import Annotationdocument
    13 from document.models import Tag, Cluster
    14 from document.models import Tag, Cluster
    14 from mediapartdb.MediapartReader import MediapartReader
    15 from mediapartdb.MediapartReader import MediapartReader
    15 from dataparser.ClientDocumentsGetAttributes import ClientDocumentsGetAttributes
    16 from dataparser.ClientDocumentsGetAttributes import ClientDocumentsGetAttributes
    16 
    17 from document.models import Documentaryfile
    17 logger = logging.getLogger('document')
    18 logger = logging.getLogger('document')
    18 
    19 
    19 # List of documents of a cluster with annotations
    20 # List of documents of a cluster with annotations
    20 class Documents(object):
    21 class Documents(object):
    21 
    22 
    22     def __init__(self, request):
    23     def __init__(self, request):
    23         self.request = request
    24         self.request = request
    24     
    25     
    25     def get_documents(self,query,cluster,offset,count):
    26     def get_documents(self,query,cluster,offset,count,docId):
       
    27         logger.info('get_documents query'+str(query))
       
    28         logger.info('get_documents cluster'+str(cluster))
       
    29         logger.info('get_documents offset'+str(offset))
       
    30         logger.info('get_documents docId'+str(docId))
       
    31         logger.info('get_documents count'+str(count))
       
    32 
    26         json = {}
    33         json = {}
    27         
    34         
    28         if query == 0:
    35         '''if int(query) == 0 and int(docId) == 0:
       
    36             logger.info('ENTER1')
    29             attr = ClientDocumentsGetAttributes(self.request)
    37             attr = ClientDocumentsGetAttributes(self.request)
    30        
    38        
    31             if not attr.get_cluster():
    39             if not attr.get_cluster():
    32                 json = '{"error msg": "no cluster_id defined"}'
    40                 json = '{"error msg": "no cluster_id defined"}'
    33                 return json
    41                 return json
    46             
    54             
    47             query_id = int(attr.get_query_id())
    55             query_id = int(attr.get_query_id())
    48             cluster_id = int(attr.get_cluster())
    56             cluster_id = int(attr.get_cluster())
    49             offset = int(attr.get_offset())
    57             offset = int(attr.get_offset())
    50             count=int(attr.get_count())
    58             count=int(attr.get_count())
       
    59         elif int(query) == 0 and int(docId) != 0:
       
    60             logger.info('ENTER2')
       
    61             try:
       
    62                 documentaryfile = Documentaryfile.objects.get(pk=int(docId))
       
    63             except Documentaryfile.DoesNotExist:
       
    64                 logger.info('ERROR !!')
       
    65                 json = '{"Error": "Invalid documentary id"}'
       
    66                 logger.info(json)
       
    67             logger.info('LONGUER !!'+str((documentaryfile.cluster_set.all())[int(cluster)].title))
       
    68             #for thecluster in documentaryfile.cluster_set.all():     
       
    69                 
       
    70                 
       
    71                 
       
    72                 
       
    73                 
       
    74                 
    51         else:
    75         else:
    52             json['cluster_id'] = int(cluster)
    76             json['cluster_id'] = int(cluster)
    53             json['offset'] = int(offset)
    77             json['offset'] = int(offset)'''
    54             
    78         query_id = int(query)
    55             query_id = int(query)
    79         cluster_id = int(cluster)
    56             cluster_id = int(cluster)
    80         offset = int(offset)
    57             offset = int(offset)
    81         count=int(count)
    58             count=int(count)
       
    59         
    82         
    60         self.request.session['query'] = query_id
    83         logger.info(self.request.session['jsonTreemap'])      
    61         self.request.session['cluster'] = cluster_id
       
    62         self.request.session['offset'] = offset
       
    63         self.request.session['count'] = count
       
    64          
       
    65         '''print self.request.session['json']      
       
    66         json_treemap = simplejson.loads(self.request.session['json'])      
       
    67         print json_treemap.query'''
       
    68         
       
    69         d = simplejson.loads(self.request.session['json'])
       
    70         print d     
       
    71         jsonquery = {'text': d['query']['text']}
       
    72         jsonquery['categories'] = d['query']['categories']
       
    73         jsonquery['from_date'] = d['query']['from_date']
       
    74         jsonquery['to_date'] = d['query']['to_date']
       
    75         json['query'] = jsonquery
       
    76            
       
    77         json['documents'] = []
    84         json['documents'] = []
    78         article_index=0
    85         article_index=0
    79         #if no query_id it is a cluster saved in database
    86         #if docid != 0 it is a cluster saved in database
    80         if not query_id:
    87         if int(query) == 0:
    81             #json = '{"error msg": "query_id is not defined"}'
    88             logger.info('docId != 0')
    82             try:
    89             try:
    83                 cluster = Cluster.objects.get(pk=cluster_id)
    90                 documentaryfile = Documentaryfile.objects.get(pk=int(docId))
    84             except Cluster.DoesNotExist:
    91             except Documentaryfile.DoesNotExist:
    85                 json = '{"error": "Invalid cluster id"}'
    92                 logger.info('ERROR !!')
       
    93                 json = '{"Error": "Invalid documentary id"}'
    86                 logger.info(json)
    94                 logger.info(json)
    87                 return json
    95                 logger.info('LONGUER !!'+str((documentaryfile.cluster_set.all())[int(cluster)].title))
    88             json['cluster_title'] = cluster.title
    96             
       
    97             my_jsontreemap = simplejson.loads(documentaryfile.jsontreemap)
       
    98             jsonquery = {'text': my_jsontreemap['query']['text']}
       
    99             jsonquery['categories'] = my_jsontreemap['query']['categories']
       
   100             jsonquery['from_date'] = my_jsontreemap['query']['from_date']
       
   101             jsonquery['to_date'] = my_jsontreemap['query']['to_date']
       
   102             json['query'] = jsonquery
       
   103             json['cluster_title'] = (documentaryfile.cluster_set.all())[int(cluster)].title
    89             reader = MediapartReader()
   104             reader = MediapartReader()
    90             for thedocument in cluster.document.all():
   105             for thedocument in (documentaryfile.cluster_set.all())[int(cluster)].document.all():
    91                 article_index += 1
   106                 article_index += 1
    92                 jsonarticle = {'id':str(thedocument.documentId)}
   107                 if article_index - 1 >= offset and article_index - 1 < offset + count:
    93                 jsonarticle['title'] = str(thedocument.title)
   108                     jsonarticle = {'id':thedocument.documentId}
    94                 jsonarticle['abstract'] = str(thedocument.description)
   109                     jsonarticle['title'] = thedocument.title
    95                 jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId))
   110                     jsonarticle['abstract'] = thedocument.description
    96                 # TODO
   111                     jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId))
    97                 jsonarticle['url_image'] = thedocument.image.url
   112                     # TODO
    98                 '''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z''' 
   113                     jsonarticle['url_image'] = thedocument.image.url
    99                 
   114                     '''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z''' 
   100                 jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y')
   115                     
   101 
   116                     jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y')
   102 
   117                     jsonarticle['category'] = reader.get_category(str(thedocument.documentId))
   103                 jsonarticle['category'] = reader.get_category(str(thedocument.documentId))
   118                 
   104                 
   119                     clusterDoc = (documentaryfile.cluster_set.all())[int(cluster)].clusterdocumentweight_set.get(document=thedocument)
   105                 clusterDoc = cluster.clusterdocumentweight_set.get(document=thedocument)
   120                     jsonarticle['weight'] = clusterDoc.weight
   106                 jsonarticle['weight'] = clusterDoc.weight
   121                     tags = reader.get_tags(str(thedocument.documentId))
   107                 tags = reader.get_tags(str(thedocument.documentId))
   122                     jsonarticle['tags'] = []
   108                 jsonarticle['tags'] = []
   123                     #tags in mediapart
   109                 #tags in mediapart
   124                     for tag in tags:
   110                 for tag in tags:
   125                         jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
   111                     jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
   126                         jsonarticle['tags'].append(jsontag)
   112                     jsonarticle['tags'].append(jsontag)
   127         
   113 
   128                     #tags in periplus
   114                 #tags in periplus
   129                     tags = thedocument.tag_set.all()
   115                 tags = thedocument.tag_set.all()
   130                     for tag in tags:
   116                 for tag in tags:
   131                         jsontag = {'title':tag.value}
   117                     jsontag = {'title':tag.value}
   132                         jsonarticle['tags'].append(jsontag)
   118                     jsonarticle['tags'].append(jsontag)
   133                         
   119                     
   134                     author = self.get_author(str(thedocument.documentId))
   120                 author = self.get_author(str(thedocument.documentId))
   135                     jsonarticle['author'] = []
   121                 jsonarticle['author'] = []
   136                     jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
   122                 jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
   137                     jsonarticle['author'].append(jsonauthor)
   123                 jsonarticle['author'].append(jsonauthor)
   138                     
   124                 
   139                     json['documents'].append(jsonarticle)
   125                 json['documents'].append(jsonarticle)
   140                     jsonarticle['annotations'] = []
   126                 jsonarticle['annotations'] = []
   141                     
   127                 
   142                     for theannotationdoc in thedocument.annotationdocument_set.all():
   128                 for theannotationdoc in thedocument.annotationdocument_set.all():
   143                         #Take only the public annotations
   129                     #Take only the public annotations
   144                         if theannotationdoc.visibility == 1:
   130                     if theannotationdoc.visibility == 1:
   145                             jsonannotation = {'id':theannotationdoc.id}
   131                         jsonannotation = {'id':theannotationdoc.id}
   146                             jsonannotation['user'] = theannotationdoc.user.username
   132                         jsonannotation['user'] = theannotationdoc.user.username
   147                             # Test the scope of the annotation (a part of an article or the global article)
   133                         # Test the scope of the annotation (a part of an article or the global article)
   148                             if theannotationdoc.annoted_text:
   134                         if theannotationdoc.annoted_text:
   149                                 jsonannotation['annotated_text'] = theannotationdoc.annoted_text
   135                             jsonannotation['annotated_text'] = theannotationdoc.annoted_text
   150                             jsonannotation['text'] = theannotationdoc.description
   136                         jsonannotation['text'] = theannotationdoc.description
   151                      
   137                  
   152                             jsonannotation['tags'] = []
   138                         jsonannotation['tags'] = []
   153                             for theannotationdoctag in theannotationdoc.tag_set.all():
   139                         for theannotationdoctag in theannotationdoc.tag_set.all():
   154                                 logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value))
   140                             logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value))
   155                                 jsontag = {'id': theannotationdoctag.value}
   141                             jsontag = {'id': theannotationdoctag.value}
   156                                 jsontag = {'title':str(theannotationdoctag.value)}
   142                             jsontag = {'title':str(theannotationdoctag.value)}
   157                                 #TO DO URL ?
   143                             #TO DO URL ?
   158                                 jsonannotation['tags'].append(jsontag)
   144                             jsonannotation['tags'].append(jsontag)
   159                                     
   145                                 
   160                             jsonarticle['annotations'].append(jsonannotation)
   146                         jsonarticle['annotations'].append(jsonannotation)
       
   147                                  
   161                                  
   148         #if query_id it is a cluster saved in cache
   162         #if query_id it is a cluster saved in cache
   149         else:
   163         else:
   150             logger.info('query_id present')
   164             logger.info('query_id present'+str(query_id))
       
   165             d = simplejson.loads(self.request.session['jsonTreemap'])
       
   166             logger.info(d)     
       
   167             jsonquery = {'text': d['query']['text']}
       
   168             jsonquery['categories'] = d['query']['categories']
       
   169             jsonquery['from_date'] = d['query']['from_date']
       
   170             jsonquery['to_date'] = d['query']['to_date']
       
   171             json['query'] = jsonquery
   151             dico = self.get_contextual_data(query_id)
   172             dico = self.get_contextual_data(query_id)
       
   173             logger.info('dico'+str(dico))
   152             if dico['weblab_data']:
   174             if dico['weblab_data']:
   153                 list_concepts, concepts_with_detailed_documents_list = dico['weblab_data']
   175                 list_concepts, concepts_with_detailed_documents_list = dico['weblab_data']
   154                 filtering = dico['filtering_params']
   176                 filtering = dico['filtering_params']
   155                 if not list_concepts:
   177                 if not list_concepts:
   156                     json = '{"error msg": "no data for the query id"}'
   178                     json = '{"error msg": "no data for the query id"}'
   157                     return json
   179                     return json
   158                 if int(cluster_id) >= len(list_concepts):
   180                 if int(cluster_id) >= len(list_concepts):
   159                     json = '{"error msg": "invalid cluster id"}'
   181                     json = '{"error msg": "invalid cluster id"}'
   160                     return json
   182                     return json
   161                 categories = filtering['categories']
   183                 categories = filtering['categories']
   162                 print 'get_documents !!!!'
   184                 logger.info('get_documents !!!!')
   163                 print categories
   185                 logger.info(categories)
   164                 from_date = filtering['from_date']
   186                
   165                 print 'from_date'
   187                 time_object1 = time.strptime(filtering['from_date'], '%m/%d/%Y')
   166                 print from_date
   188                 from_date = str(int(time.mktime(time_object1)))
       
   189                 logger.info('get_documents 2!!!!'+str(from_date))
   167                 if from_date == '':
   190                 if from_date == '':
   168                     from_date = 0
   191                     from_date = 0
   169                 to_date = filtering['to_date']
   192                 
   170                 print 'to_date'
   193                 time_object2 = time.strptime(filtering['to_date'], '%m/%d/%Y')
   171                 print to_date
   194                 to_date = str(int(time.mktime(time_object2)))
       
   195                               
   172                 if to_date == '':
   196                 if to_date == '':
   173                     to_date = 9999999999
   197                     to_date = 9999999999
   174                 json['cluster_title'] = list_concepts[cluster_id]['title']
   198                 json['cluster_title'] = list_concepts[cluster_id]['title']
   175                 for document in concepts_with_detailed_documents_list[cluster_id]:
   199                 for document in concepts_with_detailed_documents_list[cluster_id]:
   176                     #Filtering by category
   200                     #Filtering by category
       
   201                     logger.info('categories) !!!!!!!!!!!!!!!!!!!!!!!!')
       
   202                     logger.info(categories)
       
   203                     logger.info('document[category] !!!!!!!!!!!!!!!!!!!!!!!!')
       
   204                     logger.info(str(document['category']))
       
   205                     logger.info('document[date] !!!!!!!!!!!!!!!!!!!!!!!!')
       
   206                     logger.info(str(document['date']))
       
   207                     logger.info('to_date !!!!!!!!!!!!!!!!!!!!!!!!')
       
   208                     logger.info(str(to_date))
       
   209                     logger.info('from_date !!!!!!!!!!!!!!!!!!!!!!!!')
       
   210                     logger.info(str(from_date))
       
   211                     
       
   212                     
   177                     if (categories != [] and document['category'] in categories) or (categories == []):
   213                     if (categories != [] and document['category'] in categories) or (categories == []):
   178                         #Filtering by date
   214                         #Filtering by date
   179                         if int(document['date']) >= int(from_date) and int(document['date']) < int(to_date):
   215                         if int(document['date']) >= int(from_date) and int(document['date']) < int(to_date):
       
   216                             logger.info('ENTER')
   180                             article_index += 1
   217                             article_index += 1
   181                             #Filtering by offset
   218                             #Filtering by offset
   182                             if article_index - 1 >= offset and article_index - 1 < offset + count:
   219                             if article_index - 1 >= offset and article_index - 1 < offset + count:
       
   220                                 logger.info('ENTER2')
   183                                 jsonarticle = {'id':document['id']}
   221                                 jsonarticle = {'id':document['id']}
   184                                 jsonarticle['title'] = document['title']
   222                                 jsonarticle['title'] = document['title']
   185                                 jsonarticle['abstract'] = document['abstract']
   223                                 jsonarticle['abstract'] = document['abstract']
   186                                 jsonarticle['url_document'] = document['url']
   224                                 jsonarticle['url_document'] = document['url']
       
   225                                 logger.info('ENTER3')
   187                                 # TODO
   226                                 # TODO
   188                                 jsonarticle['url_image'] = document['image_path']
   227                                 jsonarticle['url_image'] = document['image_path']
   189                                 #
   228                                 #
   190                                 '''jsonarticle['date'] = datetime.fromtimestamp(int(document['date'])).isoformat() + '.0Z''' 
   229                                 '''jsonarticle['date'] = datetime.fromtimestamp(int(document['date'])).isoformat() + '.0Z''' 
   191                                 locale.setlocale(locale.LC_ALL,'') 
   230                                 locale.setlocale(locale.LC_ALL,'') 
   192                                 jsonarticle['date'] =(datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y')
   231                                 jsonarticle['date'] = ((datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y')).decode("windows-1252").encode("utf8")
   193                                 jsonarticle['category'] = document['category']
   232                                 jsonarticle['category'] = document['category']
   194                                 jsonarticle['weight'] = float(document['weight'])
   233                                 jsonarticle['weight'] = float(document['weight'])
   195                                 reader = MediapartReader()
   234                                 reader = MediapartReader()
   196                                 tags = reader.get_tags(str(document['id']))
   235                                 tags = reader.get_tags(str(document['id']))
       
   236                                 logger.info('ENTER4')
   197                                 jsonarticle['tags'] = []
   237                                 jsonarticle['tags'] = []
       
   238                                 logger.info('ENTER5')
   198                                 for tag in tags:
   239                                 for tag in tags:
       
   240                                     logger.info('ENTER6')
   199                                     jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
   241                                     jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")}
   200                                     jsonarticle['tags'].append(jsontag)
   242                                     jsonarticle['tags'].append(jsontag)
       
   243                                 logger.info('ENTER5')
   201                                 author = self.get_author(document['id'])
   244                                 author = self.get_author(document['id'])
   202                                 print document['id']
   245                                 logger.info('ENTER5')
   203                                 jsonarticle['author'] = []
   246                                 jsonarticle['author'] = []
       
   247                                 logger.info('ENTER5')
   204                                 jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
   248                                 jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])}
       
   249                                 logger.info('ENTER5')
   205                                 jsonarticle['author'].append(jsonauthor)
   250                                 jsonarticle['author'].append(jsonauthor)
   206                                 
   251                                 logger.info('ENTER5')
   207                                 json['documents'].append(jsonarticle)
   252                                 json['documents'].append(jsonarticle)
   208                                 jsonarticle['annotations'] = []
   253                                 jsonarticle['annotations'] = []
   209                                 
   254                                 logger.info('jsonarticle')
   210                                 annotations = Annotationdocument.objects.all()
   255                                 annotations = Annotationdocument.objects.all()
   211                                 for annotation in annotations:
   256                                 for annotation in annotations:
   212                                     #Take only the public annotations
   257                                     #Take only the public annotations
   213                                     if annotation.visibility == 1:
   258                                     if annotation.visibility == 1:
   214                                         jsonannotation = {'id':annotation.id}
   259                                         jsonannotation = {'id':annotation.id}
   229                                                 
   274                                                 
   230                                         jsonarticle['annotations'].append(jsonannotation)
   275                                         jsonarticle['annotations'].append(jsonannotation)
   231             else:
   276             else:
   232                 json = '{"Error: Invalid query id"}'
   277                 json = '{"Error: Invalid query id"}'
   233                 return json
   278                 return json
       
   279         logger.info('jsonarticle2')
   234         json['total_count'] = article_index
   280         json['total_count'] = article_index
       
   281         logger.info('jsondocument'+str(json))
   235         result = simplejson.dumps(json)
   282         result = simplejson.dumps(json)
       
   283         logger.info('result')
       
   284         logger.info(result)
   236         return result  
   285         return result  
   237     
   286     
   238     def get_author(self, document_id):
   287     def get_author(self, document_id):
   239         reader = MediapartReader()
   288         reader = MediapartReader()
   240         dico = reader.get_author(document_id)
   289         dico = reader.get_author(document_id)
   241         return dico
   290         return dico
   242         
   291         
   243     def get_contextual_data(self, query_id):
   292     def get_contextual_data(self, query_id):
   244         query_context = cache.get(query_id)
   293         query_context = cache.get(query_id)
       
   294         logger.info('query_id ********** ='+str(query_context['filtering_params']))
   245         if not query_context:
   295         if not query_context:
   246             print "Error: Invalid query id:"+query_id
   296             logger.info("Error: Invalid query id:"+query_id)
   247             logger.info("Error: Invalid query id:"+query_id)
   297             logger.info("Error: Invalid query id:"+query_id)
   248             weblab_data=None
   298             weblab_data=None
   249             query_context ={'filtering_params':{'from_date':0, 'to_date':0, 'categories':[]}}
   299             query_context ={'filtering_params':{'from_date':0, 'to_date':0, 'categories':[]}}
   250         else:
   300         '''else:
   251             weblab_data = cache.get(query_context['weblab_data_key'])
   301             weblab_data = cache.get(query_context['weblab_data_key'])
   252             
   302             logger.info('query_context ********** ='+str(self.request.session.items()))
       
   303             logger.info('query_context ********** ='+str(self.request.session['to_date']))
       
   304             logger.info('query_context ********** ='+str(self.request.session['category']))
       
   305             query_context ={'filtering_params':{'from_date':self.request.session['from_date'], 'to_date':self.request.session['to_date'], 'categories':self.request.session['category']}}'''
       
   306         weblab_data = cache.get(query_context['weblab_data_key'])
       
   307         logger.info('query_context ********** ='+str(cache.get(query_context['weblab_data_key'])))
       
   308           
   253         return {'weblab_data':weblab_data, 'filtering_params':query_context['filtering_params']}
   309         return {'weblab_data':weblab_data, 'filtering_params':query_context['filtering_params']}