diff -r 73f19fa4f997 -r 8f77cf71ab02 src/hdalab/management/commands/query_geo_inclusion.py --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/src/hdalab/management/commands/query_geo_inclusion.py Tue Jun 17 10:25:33 2014 +0200 @@ -0,0 +1,61 @@ +# -*- coding: utf-8 -*- +''' +Created on Feb 22, 2012 + +@author: raphv +''' + +from django.core.management.base import NoArgsCommand +from django.core.management.color import no_style +from hdabo.utils import show_progress +from hdabo.models import Tag +from hdalab.models import Country, GeoInclusion +from SPARQLWrapper import SPARQLWrapper, JSON +import re + +class Command(NoArgsCommand): + def handle_noargs(self, **options): + self.style = no_style() + + qs = Tag.objects.filter(category__label="Localisation").exclude(dbpedia_uri = None) + total = qs.count() + + endpoint = SPARQLWrapper("http://dbpedia.org/sparql") + endpoint.setReturnFormat(JSON) + sparqltext = """ + SELECT ?resource WHERE { + { <%s> ?resource . } + UNION + { <%s> ?resource . } + } + """ + resourceprefix = "http://dbpedia.org/resource/" + identityuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" + + writer = None + + for i,tag in enumerate(qs): + endpoint.setQuery(sparqltext % (tag.dbpedia_uri, tag.dbpedia_uri)) + + results = endpoint.query().convert()['results']['bindings'] + + if len(results) == 1: # We don't want places located in multiple countries + + resourceuri = results[0]['resource']['value'] + + if re.match(resourceprefix, resourceuri): + countrytxt = re.findall('([^/]+$)', resourceuri)[0] + + country, created = Country.objects.get_or_create(dbpedia_uri=resourceuri) + GeoInclusion.objects.get_or_create(tag=tag, country=country) + + if resourceuri == identityuri: + countrytxt = '' + + country, created = Country.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri) + GeoInclusion.objects.get_or_create(tag=tag, country=country) + + else: + countrytxt = '' + + writer = show_progress(i+1, total, '%s => %s'%(tag.label, countrytxt), 50, writer) \ No newline at end of file