src/core/wp_utils.py
changeset 71 3fde7d26ad08
parent 67 5d9223bb3aab
child 80 3851909cb730
equal deleted inserted replaced
70:1add6bd2794b 71:3fde7d26ad08
   166 
   166 
   167     return term, wikipedia_revision_id, created
   167     return term, wikipedia_revision_id, created
   168 
   168 
   169 
   169 
   170 def process_term(site, term, lang, label=None, verbosity=0):
   170 def process_term(site, term, lang, label=None, verbosity=0):
   171             
   171     
       
   172     label_is_url = False
       
   173     fragment = ""
   172     if not label:
   174     if not label:
   173         label = term.label
   175         label = term.label
   174     else:
   176     else:
   175         for lang_code, urls in settings.WIKIPEDIA_URLS.iteritems():
   177         for lang_code, urls in settings.WIKIPEDIA_URLS.iteritems():
   176             if label.startswith(urls['page_url']):
   178             if label.startswith(urls['page_url']):
   177                 # lang is overrided when an url is passed as a label.
   179                 # lang is overrided when an url is passed as a label.
   178                 lang = lang_code
   180                 lang = lang_code
   179                 url_parts = urlparse(label)
   181                 url_parts = urlparse(label)
   180                 label = urllib2.unquote(str(url_parts.path.split('/')[-1])).decode("utf-8")
   182                 label = urllib2.unquote(str(url_parts.path.split('/')[-1])).decode("utf-8")
       
   183                 if url_parts.fragment:
       
   184                     label_is_url = True
       
   185                     fragment = url_parts.fragment
   181                 break
   186                 break
   182 
   187 
   183     if site == None:
   188     if site == None:
   184         site = __get_site(lang)
   189         site = __get_site(lang)
   185 
   190 
   186     wp_res = query_wikipedia_title(site, lang, label=label)
   191     wp_res = query_wikipedia_title(site, lang, label=label)
   187     new_label = wp_res['new_label']
   192     new_label = wp_res['new_label']
   188     alternative_label= wp_res['alternative_label']
   193     alternative_label= wp_res['alternative_label']
   189     status =  wp_res['status']
   194     status =  wp_res['status']
   190     url = wp_res['wikipedia_url']
   195     url = wp_res['wikipedia_url'] + ("#"+fragment if label_is_url else "")    
   191     alternative_url = wp_res['alternative_wikipedia_url']
   196     alternative_url = wp_res['alternative_wikipedia_url']
   192     pageid = wp_res['pageid']
   197     pageid = wp_res['pageid']
   193     alternative_pageid = wp_res['alternative_pageid']
   198     alternative_pageid = wp_res['alternative_pageid']
   194     response = wp_res['response']
   199     response = wp_res['response']
   195     dbpedia_uri =  wp_res["dbpedia_uri"]
   200     dbpedia_uri =  wp_res["dbpedia_uri"]