alcatel/dataparser/WebLabParser.py
changeset 37 3848e1813a30
parent 27 8ca7f2cea729
equal deleted inserted replaced
36:bad0e6c60b63 37:3848e1813a30
    39                     mediaunit_elts = description.getElementsByTagName('mediaUnit')
    39                     mediaunit_elts = description.getElementsByTagName('mediaUnit')
    40                     for mediaunit in mediaunit_elts:
    40                     for mediaunit in mediaunit_elts:
    41                         has_native_content_elts = mediaunit.getElementsByTagName('wlr:hasNativeContent')
    41                         has_native_content_elts = mediaunit.getElementsByTagName('wlr:hasNativeContent')
    42                         for has_native_content in has_native_content_elts:
    42                         for has_native_content in has_native_content_elts:
    43                             a_url_image = has_native_content.childNodes[0].data
    43                             a_url_image = has_native_content.childNodes[0].data
    44                             print 'a_url_image'
       
    45                             print a_url_image
       
    46                             break
    44                             break
    47                         break
    45                         break
    48                         
    46                         
    49                     wls_score_elts = description.getElementsByTagName(self._ns + ':hasScore')
    47                     wls_score_elts = description.getElementsByTagName(self._ns + ':hasScore')
    50                     atitle = ''
    48                     atitle = ''
    51                     for title in dc_title_elts:
    49                     for title in dc_title_elts:
    52                         atitle = title.childNodes[0].data
    50                         atitle = title.childNodes[0].data
    53                     a_score = '0'
    51                     a_score = '0'
    54                     for wls_score in wls_score_elts:
    52                     for wls_score in wls_score_elts:
    55                         a_score = wls_score.childNodes[0].data
    53                         a_score = wls_score.childNodes[0].data
    56                         print 'a_score'
    54                         
    57                         print a_score
    55                         
    58                     if atitle != '':
    56                     if atitle != '':
    59                         has_description_elts = description.getElementsByTagName(self._ns + ':hasDescription')
    57                         has_description_elts = description.getElementsByTagName(self._ns + ':hasDescription')
    60                         cluster_abstract = ''
    58                         cluster_abstract = ''
    61                         for has_description in has_description_elts:
    59                         for has_description in has_description_elts:
    62                             cluster_abstract = has_description.childNodes[0].data
    60                             cluster_abstract = has_description.childNodes[0].data
    63                         concept_list.append({'about':about,'title': atitle, 'abstract':cluster_abstract, 'url_image':a_url_image, 'score':a_score})
    61                         concept_list.append({'about':about,'title': atitle, 'abstract':cluster_abstract, 'url_image':a_url_image, 'score':a_score})
    64         logger.info('concept_list')
       
    65         logger.info(concept_list)
       
    66         return concept_list
    62         return concept_list
    67                            
    63                            
    68     def get_documents_concepts_(self, dom):
    64     def get_documents_concepts_(self, dom):
    69         concepts_with_documents_list = []
    65         concepts_with_documents_list = []
    70         for node in dom.getElementsByTagName('resultSet'):
    66         for node in dom.getElementsByTagName('resultSet'):
    85                         mediaunit_elts = description.getElementsByTagName('mediaUnit')
    81                         mediaunit_elts = description.getElementsByTagName('mediaUnit')
    86                         for mediaunit in mediaunit_elts:
    82                         for mediaunit in mediaunit_elts:
    87                             has_native_content_elts = mediaunit.getElementsByTagName('wlr:hasNativeContent')
    83                             has_native_content_elts = mediaunit.getElementsByTagName('wlr:hasNativeContent')
    88                             for has_native_content in has_native_content_elts:
    84                             for has_native_content in has_native_content_elts:
    89                                 img_internal_path = has_native_content.childNodes[0].data
    85                                 img_internal_path = has_native_content.childNodes[0].data
    90                                 print 'img_internal_path'
       
    91                                 print img_internal_path
       
    92                                 break
    86                                 break
    93                             break
    87                             break
    94                     
    88                     
    95                         dc_identifier_elts = description.getElementsByTagName('dc:identifier')
    89                         dc_identifier_elts = description.getElementsByTagName('dc:identifier')
    96                         document_id = dc_identifier_elts[0].childNodes[0].data
    90                         document_id = dc_identifier_elts[0].childNodes[0].data
   118                                         img_internal_path = has_exposed_content_elts[0].childNodes[0].data'''
   112                                         img_internal_path = has_exposed_content_elts[0].childNodes[0].data'''
   119 
   113 
   120                         concept_documents_list.append({'id':document_id, 'score':score, 'rank':rank, 'isLinkedTo':isLinkedTo, 'image_path':img_internal_path, 'abstract':abstract})  
   114                         concept_documents_list.append({'id':document_id, 'score':score, 'rank':rank, 'isLinkedTo':isLinkedTo, 'image_path':img_internal_path, 'abstract':abstract})  
   121 
   115 
   122                 concepts_with_documents_list.append(concept_documents_list)
   116                 concepts_with_documents_list.append(concept_documents_list)
   123         logger.info('concepts_with_documents_list')
       
   124         logger.info(concepts_with_documents_list)
       
   125         return concepts_with_documents_list
   117         return concepts_with_documents_list
   126             
   118             
   127     def get_document_ids_concepts_(self, dom, list_links_concepts):
   119     def get_document_ids_concepts_(self, dom, list_links_concepts):
   128         document_ids_concepts_list = [] 
   120         document_ids_concepts_list = [] 
   129         for list_links_of_a_concept in list_links_concepts:
   121         for list_links_of_a_concept in list_links_concepts: