diff -r bad0e6c60b63 -r 3848e1813a30 alcatel/controller/Documents.py --- a/alcatel/controller/Documents.py Mon Jul 22 14:56:35 2013 +0200 +++ b/alcatel/controller/Documents.py Wed Aug 14 16:36:41 2013 +0200 @@ -7,13 +7,14 @@ import simplejson import locale from datetime import datetime +import time from django.core.cache import cache from document.models import Annotationdocument from document.models import Tag, Cluster from mediapartdb.MediapartReader import MediapartReader from dataparser.ClientDocumentsGetAttributes import ClientDocumentsGetAttributes - +from document.models import Documentaryfile logger = logging.getLogger('document') # List of documents of a cluster with annotations @@ -22,10 +23,17 @@ def __init__(self, request): self.request = request - def get_documents(self,query,cluster,offset,count): + def get_documents(self,query,cluster,offset,count,docId): + logger.info('get_documents query'+str(query)) + logger.info('get_documents cluster'+str(cluster)) + logger.info('get_documents offset'+str(offset)) + logger.info('get_documents docId'+str(docId)) + logger.info('get_documents count'+str(count)) + json = {} - if query == 0: + '''if int(query) == 0 and int(docId) == 0: + logger.info('ENTER1') attr = ClientDocumentsGetAttributes(self.request) if not attr.get_cluster(): @@ -48,107 +56,121 @@ cluster_id = int(attr.get_cluster()) offset = int(attr.get_offset()) count=int(attr.get_count()) + elif int(query) == 0 and int(docId) != 0: + logger.info('ENTER2') + try: + documentaryfile = Documentaryfile.objects.get(pk=int(docId)) + except Documentaryfile.DoesNotExist: + logger.info('ERROR !!') + json = '{"Error": "Invalid documentary id"}' + logger.info(json) + logger.info('LONGUER !!'+str((documentaryfile.cluster_set.all())[int(cluster)].title)) + #for thecluster in documentaryfile.cluster_set.all(): + + + + + + else: json['cluster_id'] = int(cluster) - json['offset'] = int(offset) - - query_id = int(query) - cluster_id = int(cluster) - offset = int(offset) - count=int(count) + json['offset'] = int(offset)''' + query_id = int(query) + cluster_id = int(cluster) + offset = int(offset) + count=int(count) - self.request.session['query'] = query_id - self.request.session['cluster'] = cluster_id - self.request.session['offset'] = offset - self.request.session['count'] = count - - '''print self.request.session['json'] - json_treemap = simplejson.loads(self.request.session['json']) - print json_treemap.query''' - - d = simplejson.loads(self.request.session['json']) - print d - jsonquery = {'text': d['query']['text']} - jsonquery['categories'] = d['query']['categories'] - jsonquery['from_date'] = d['query']['from_date'] - jsonquery['to_date'] = d['query']['to_date'] - json['query'] = jsonquery - + logger.info(self.request.session['jsonTreemap']) json['documents'] = [] article_index=0 - #if no query_id it is a cluster saved in database - if not query_id: - #json = '{"error msg": "query_id is not defined"}' + #if docid != 0 it is a cluster saved in database + if int(query) == 0: + logger.info('docId != 0') try: - cluster = Cluster.objects.get(pk=cluster_id) - except Cluster.DoesNotExist: - json = '{"error": "Invalid cluster id"}' + documentaryfile = Documentaryfile.objects.get(pk=int(docId)) + except Documentaryfile.DoesNotExist: + logger.info('ERROR !!') + json = '{"Error": "Invalid documentary id"}' logger.info(json) - return json - json['cluster_title'] = cluster.title + logger.info('LONGUER !!'+str((documentaryfile.cluster_set.all())[int(cluster)].title)) + + my_jsontreemap = simplejson.loads(documentaryfile.jsontreemap) + jsonquery = {'text': my_jsontreemap['query']['text']} + jsonquery['categories'] = my_jsontreemap['query']['categories'] + jsonquery['from_date'] = my_jsontreemap['query']['from_date'] + jsonquery['to_date'] = my_jsontreemap['query']['to_date'] + json['query'] = jsonquery + json['cluster_title'] = (documentaryfile.cluster_set.all())[int(cluster)].title reader = MediapartReader() - for thedocument in cluster.document.all(): + for thedocument in (documentaryfile.cluster_set.all())[int(cluster)].document.all(): article_index += 1 - jsonarticle = {'id':str(thedocument.documentId)} - jsonarticle['title'] = str(thedocument.title) - jsonarticle['abstract'] = str(thedocument.description) - jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId)) - # TODO - jsonarticle['url_image'] = thedocument.image.url - '''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z''' - - jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y') - - - jsonarticle['category'] = reader.get_category(str(thedocument.documentId)) + if article_index - 1 >= offset and article_index - 1 < offset + count: + jsonarticle = {'id':thedocument.documentId} + jsonarticle['title'] = thedocument.title + jsonarticle['abstract'] = thedocument.description + jsonarticle['url_document'] = reader.get_url(str(thedocument.documentId)) + # TODO + jsonarticle['url_image'] = thedocument.image.url + '''jsonarticle['date'] = datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId)))).isoformat() + '.0Z''' + + jsonarticle['date'] =(datetime.fromtimestamp(int(reader.get_date(str(thedocument.documentId))))).strftime('%d-%m-%Y') + jsonarticle['category'] = reader.get_category(str(thedocument.documentId)) - clusterDoc = cluster.clusterdocumentweight_set.get(document=thedocument) - jsonarticle['weight'] = clusterDoc.weight - tags = reader.get_tags(str(thedocument.documentId)) - jsonarticle['tags'] = [] - #tags in mediapart - for tag in tags: - jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")} - jsonarticle['tags'].append(jsontag) - - #tags in periplus - tags = thedocument.tag_set.all() - for tag in tags: - jsontag = {'title':tag.value} - jsonarticle['tags'].append(jsontag) + clusterDoc = (documentaryfile.cluster_set.all())[int(cluster)].clusterdocumentweight_set.get(document=thedocument) + jsonarticle['weight'] = clusterDoc.weight + tags = reader.get_tags(str(thedocument.documentId)) + jsonarticle['tags'] = [] + #tags in mediapart + for tag in tags: + jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")} + jsonarticle['tags'].append(jsontag) + + #tags in periplus + tags = thedocument.tag_set.all() + for tag in tags: + jsontag = {'title':tag.value} + jsonarticle['tags'].append(jsontag) + + author = self.get_author(str(thedocument.documentId)) + jsonarticle['author'] = [] + jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])} + jsonarticle['author'].append(jsonauthor) - author = self.get_author(str(thedocument.documentId)) - jsonarticle['author'] = [] - jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])} - jsonarticle['author'].append(jsonauthor) - - json['documents'].append(jsonarticle) - jsonarticle['annotations'] = [] - - for theannotationdoc in thedocument.annotationdocument_set.all(): - #Take only the public annotations - if theannotationdoc.visibility == 1: - jsonannotation = {'id':theannotationdoc.id} - jsonannotation['user'] = theannotationdoc.user.username - # Test the scope of the annotation (a part of an article or the global article) - if theannotationdoc.annoted_text: - jsonannotation['annotated_text'] = theannotationdoc.annoted_text - jsonannotation['text'] = theannotationdoc.description - - jsonannotation['tags'] = [] - for theannotationdoctag in theannotationdoc.tag_set.all(): - logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value)) - jsontag = {'id': theannotationdoctag.value} - jsontag = {'title':str(theannotationdoctag.value)} - #TO DO URL ? - jsonannotation['tags'].append(jsontag) - - jsonarticle['annotations'].append(jsonannotation) + json['documents'].append(jsonarticle) + jsonarticle['annotations'] = [] + + for theannotationdoc in thedocument.annotationdocument_set.all(): + #Take only the public annotations + if theannotationdoc.visibility == 1: + jsonannotation = {'id':theannotationdoc.id} + jsonannotation['user'] = theannotationdoc.user.username + # Test the scope of the annotation (a part of an article or the global article) + if theannotationdoc.annoted_text: + jsonannotation['annotated_text'] = theannotationdoc.annoted_text + jsonannotation['text'] = theannotationdoc.description + + jsonannotation['tags'] = [] + for theannotationdoctag in theannotationdoc.tag_set.all(): + logger.info('DOCUMENT_TAG_VALUE_OF_ANNOTATION == '+str(theannotationdoctag.value)) + jsontag = {'id': theannotationdoctag.value} + jsontag = {'title':str(theannotationdoctag.value)} + #TO DO URL ? + jsonannotation['tags'].append(jsontag) + + jsonarticle['annotations'].append(jsonannotation) #if query_id it is a cluster saved in cache else: - logger.info('query_id present') + logger.info('query_id present'+str(query_id)) + d = simplejson.loads(self.request.session['jsonTreemap']) + logger.info(d) + jsonquery = {'text': d['query']['text']} + jsonquery['categories'] = d['query']['categories'] + jsonquery['from_date'] = d['query']['from_date'] + jsonquery['to_date'] = d['query']['to_date'] + json['query'] = jsonquery dico = self.get_contextual_data(query_id) + logger.info('dico'+str(dico)) if dico['weblab_data']: list_concepts, concepts_with_detailed_documents_list = dico['weblab_data'] filtering = dico['filtering_params'] @@ -159,54 +181,77 @@ json = '{"error msg": "invalid cluster id"}' return json categories = filtering['categories'] - print 'get_documents !!!!' - print categories - from_date = filtering['from_date'] - print 'from_date' - print from_date + logger.info('get_documents !!!!') + logger.info(categories) + + time_object1 = time.strptime(filtering['from_date'], '%m/%d/%Y') + from_date = str(int(time.mktime(time_object1))) + logger.info('get_documents 2!!!!'+str(from_date)) if from_date == '': from_date = 0 - to_date = filtering['to_date'] - print 'to_date' - print to_date + + time_object2 = time.strptime(filtering['to_date'], '%m/%d/%Y') + to_date = str(int(time.mktime(time_object2))) + if to_date == '': to_date = 9999999999 json['cluster_title'] = list_concepts[cluster_id]['title'] for document in concepts_with_detailed_documents_list[cluster_id]: #Filtering by category + logger.info('categories) !!!!!!!!!!!!!!!!!!!!!!!!') + logger.info(categories) + logger.info('document[category] !!!!!!!!!!!!!!!!!!!!!!!!') + logger.info(str(document['category'])) + logger.info('document[date] !!!!!!!!!!!!!!!!!!!!!!!!') + logger.info(str(document['date'])) + logger.info('to_date !!!!!!!!!!!!!!!!!!!!!!!!') + logger.info(str(to_date)) + logger.info('from_date !!!!!!!!!!!!!!!!!!!!!!!!') + logger.info(str(from_date)) + + if (categories != [] and document['category'] in categories) or (categories == []): #Filtering by date if int(document['date']) >= int(from_date) and int(document['date']) < int(to_date): + logger.info('ENTER') article_index += 1 #Filtering by offset if article_index - 1 >= offset and article_index - 1 < offset + count: + logger.info('ENTER2') jsonarticle = {'id':document['id']} jsonarticle['title'] = document['title'] jsonarticle['abstract'] = document['abstract'] jsonarticle['url_document'] = document['url'] + logger.info('ENTER3') # TODO jsonarticle['url_image'] = document['image_path'] # '''jsonarticle['date'] = datetime.fromtimestamp(int(document['date'])).isoformat() + '.0Z''' locale.setlocale(locale.LC_ALL,'') - jsonarticle['date'] =(datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y') + jsonarticle['date'] = ((datetime.fromtimestamp(int(document['date']))).strftime('%d %B %Y')).decode("windows-1252").encode("utf8") jsonarticle['category'] = document['category'] jsonarticle['weight'] = float(document['weight']) reader = MediapartReader() tags = reader.get_tags(str(document['id'])) + logger.info('ENTER4') jsonarticle['tags'] = [] + logger.info('ENTER5') for tag in tags: + logger.info('ENTER6') jsontag = {'title':tag[0].decode("windows-1252").encode("utf8")} jsonarticle['tags'].append(jsontag) + logger.info('ENTER5') author = self.get_author(document['id']) - print document['id'] + logger.info('ENTER5') jsonarticle['author'] = [] + logger.info('ENTER5') jsonauthor = {'id':author['id'], 'name':author['name'], 'url':'http://www.mediapart.fr/biographie/'+str(author['id'])} + logger.info('ENTER5') jsonarticle['author'].append(jsonauthor) - + logger.info('ENTER5') json['documents'].append(jsonarticle) jsonarticle['annotations'] = [] - + logger.info('jsonarticle') annotations = Annotationdocument.objects.all() for annotation in annotations: #Take only the public annotations @@ -231,8 +276,12 @@ else: json = '{"Error: Invalid query id"}' return json + logger.info('jsonarticle2') json['total_count'] = article_index + logger.info('jsondocument'+str(json)) result = simplejson.dumps(json) + logger.info('result') + logger.info(result) return result def get_author(self, document_id): @@ -242,12 +291,19 @@ def get_contextual_data(self, query_id): query_context = cache.get(query_id) + logger.info('query_id ********** ='+str(query_context['filtering_params'])) if not query_context: - print "Error: Invalid query id:"+query_id + logger.info("Error: Invalid query id:"+query_id) logger.info("Error: Invalid query id:"+query_id) weblab_data=None query_context ={'filtering_params':{'from_date':0, 'to_date':0, 'categories':[]}} - else: + '''else: weblab_data = cache.get(query_context['weblab_data_key']) - + logger.info('query_context ********** ='+str(self.request.session.items())) + logger.info('query_context ********** ='+str(self.request.session['to_date'])) + logger.info('query_context ********** ='+str(self.request.session['category'])) + query_context ={'filtering_params':{'from_date':self.request.session['from_date'], 'to_date':self.request.session['to_date'], 'categories':self.request.session['category']}}''' + weblab_data = cache.get(query_context['weblab_data_key']) + logger.info('query_context ********** ='+str(cache.get(query_context['weblab_data_key']))) + return {'weblab_data':weblab_data, 'filtering_params':query_context['filtering_params']} \ No newline at end of file