166 |
166 |
167 return term, wikipedia_revision_id, created |
167 return term, wikipedia_revision_id, created |
168 |
168 |
169 |
169 |
170 def process_term(site, term, lang, label=None, verbosity=0): |
170 def process_term(site, term, lang, label=None, verbosity=0): |
171 |
171 |
|
172 label_is_url = False |
|
173 fragment = "" |
172 if not label: |
174 if not label: |
173 label = term.label |
175 label = term.label |
174 else: |
176 else: |
175 for lang_code, urls in settings.WIKIPEDIA_URLS.iteritems(): |
177 for lang_code, urls in settings.WIKIPEDIA_URLS.iteritems(): |
176 if label.startswith(urls['page_url']): |
178 if label.startswith(urls['page_url']): |
177 # lang is overrided when an url is passed as a label. |
179 # lang is overrided when an url is passed as a label. |
178 lang = lang_code |
180 lang = lang_code |
179 url_parts = urlparse(label) |
181 url_parts = urlparse(label) |
180 label = urllib2.unquote(str(url_parts.path.split('/')[-1])).decode("utf-8") |
182 label = urllib2.unquote(str(url_parts.path.split('/')[-1])).decode("utf-8") |
|
183 if url_parts.fragment: |
|
184 label_is_url = True |
|
185 fragment = url_parts.fragment |
181 break |
186 break |
182 |
187 |
183 if site == None: |
188 if site == None: |
184 site = __get_site(lang) |
189 site = __get_site(lang) |
185 |
190 |
186 wp_res = query_wikipedia_title(site, lang, label=label) |
191 wp_res = query_wikipedia_title(site, lang, label=label) |
187 new_label = wp_res['new_label'] |
192 new_label = wp_res['new_label'] |
188 alternative_label= wp_res['alternative_label'] |
193 alternative_label= wp_res['alternative_label'] |
189 status = wp_res['status'] |
194 status = wp_res['status'] |
190 url = wp_res['wikipedia_url'] |
195 url = wp_res['wikipedia_url'] + ("#"+fragment if label_is_url else "") |
191 alternative_url = wp_res['alternative_wikipedia_url'] |
196 alternative_url = wp_res['alternative_wikipedia_url'] |
192 pageid = wp_res['pageid'] |
197 pageid = wp_res['pageid'] |
193 alternative_pageid = wp_res['alternative_pageid'] |
198 alternative_pageid = wp_res['alternative_pageid'] |
194 response = wp_res['response'] |
199 response = wp_res['response'] |
195 dbpedia_uri = wp_res["dbpedia_uri"] |
200 dbpedia_uri = wp_res["dbpedia_uri"] |