src/hdalab/management/commands/query_geo_inclusion.py
changeset 271 8f77cf71ab02
parent 122 fde8335a037c
child 281 bc0f26b1acc2
equal deleted inserted replaced
265:73f19fa4f997 271:8f77cf71ab02
       
     1 # -*- coding: utf-8 -*-
       
     2 '''
       
     3 Created on Feb 22, 2012
       
     4 
       
     5 @author: raphv
       
     6 '''
       
     7 
       
     8 from django.core.management.base import NoArgsCommand
       
     9 from django.core.management.color import no_style
       
    10 from hdabo.utils import show_progress
       
    11 from hdabo.models import Tag
       
    12 from hdalab.models import Country, GeoInclusion
       
    13 from SPARQLWrapper import SPARQLWrapper, JSON
       
    14 import re
       
    15 
       
    16 class Command(NoArgsCommand):
       
    17     def handle_noargs(self, **options):
       
    18         self.style = no_style()
       
    19 
       
    20         qs = Tag.objects.filter(category__label="Localisation").exclude(dbpedia_uri = None)
       
    21         total = qs.count()
       
    22         
       
    23         endpoint = SPARQLWrapper("http://dbpedia.org/sparql")
       
    24         endpoint.setReturnFormat(JSON)
       
    25         sparqltext = """
       
    26             SELECT ?resource WHERE {
       
    27              { <%s> ?resource <http://dbpedia.org/ontology/Country> . }
       
    28             UNION
       
    29              { <%s> <http://dbpedia.org/ontology/country> ?resource . }
       
    30             }
       
    31         """
       
    32         resourceprefix = "http://dbpedia.org/resource/"
       
    33         identityuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
       
    34         
       
    35         writer = None
       
    36         
       
    37         for i,tag in enumerate(qs):
       
    38             endpoint.setQuery(sparqltext % (tag.dbpedia_uri, tag.dbpedia_uri))
       
    39     
       
    40             results = endpoint.query().convert()['results']['bindings']
       
    41             
       
    42             if len(results) == 1: # We don't want places located in multiple countries
       
    43                 
       
    44                 resourceuri = results[0]['resource']['value']
       
    45                 
       
    46                 if re.match(resourceprefix, resourceuri):
       
    47                     countrytxt = re.findall('([^/]+$)', resourceuri)[0]
       
    48                     
       
    49                     country, created = Country.objects.get_or_create(dbpedia_uri=resourceuri)
       
    50                     GeoInclusion.objects.get_or_create(tag=tag, country=country)
       
    51                     
       
    52                 if resourceuri == identityuri:
       
    53                     countrytxt = '<is a country>'
       
    54                     
       
    55                     country, created = Country.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri)
       
    56                     GeoInclusion.objects.get_or_create(tag=tag, country=country)
       
    57                 
       
    58             else:
       
    59                 countrytxt = '<unknown>'
       
    60             
       
    61             writer = show_progress(i+1, total, '%s => %s'%(tag.label, countrytxt), 50, writer)