|
1 # -*- coding: utf-8 -*- |
|
2 ''' |
|
3 Created on Feb 22, 2012 |
|
4 |
|
5 @author: raphv |
|
6 ''' |
|
7 |
|
8 from django.core.management.base import NoArgsCommand |
|
9 from django.core.management.color import no_style |
|
10 from hdabo.utils import show_progress |
|
11 from hdabo.models import Tag |
|
12 from hdalab.models import Country, GeoInclusion |
|
13 from SPARQLWrapper import SPARQLWrapper, JSON |
|
14 import re |
|
15 |
|
16 class Command(NoArgsCommand): |
|
17 def handle_noargs(self, **options): |
|
18 self.style = no_style() |
|
19 |
|
20 qs = Tag.objects.filter(category__label="Localisation").exclude(dbpedia_uri = None) |
|
21 total = qs.count() |
|
22 |
|
23 endpoint = SPARQLWrapper("http://dbpedia.org/sparql") |
|
24 endpoint.setReturnFormat(JSON) |
|
25 sparqltext = """ |
|
26 SELECT ?resource WHERE { |
|
27 { <%s> ?resource <http://dbpedia.org/ontology/Country> . } |
|
28 UNION |
|
29 { <%s> <http://dbpedia.org/ontology/country> ?resource . } |
|
30 } |
|
31 """ |
|
32 resourceprefix = "http://dbpedia.org/resource/" |
|
33 identityuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type" |
|
34 |
|
35 writer = None |
|
36 |
|
37 for i,tag in enumerate(qs): |
|
38 endpoint.setQuery(sparqltext % (tag.dbpedia_uri, tag.dbpedia_uri)) |
|
39 |
|
40 results = endpoint.query().convert()['results']['bindings'] |
|
41 |
|
42 if len(results) == 1: # We don't want places located in multiple countries |
|
43 |
|
44 resourceuri = results[0]['resource']['value'] |
|
45 |
|
46 if re.match(resourceprefix, resourceuri): |
|
47 countrytxt = re.findall('([^/]+$)', resourceuri)[0] |
|
48 |
|
49 country, created = Country.objects.get_or_create(dbpedia_uri=resourceuri) |
|
50 GeoInclusion.objects.get_or_create(tag=tag, country=country) |
|
51 |
|
52 if resourceuri == identityuri: |
|
53 countrytxt = '<is a country>' |
|
54 |
|
55 country, created = Country.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri) |
|
56 GeoInclusion.objects.get_or_create(tag=tag, country=country) |
|
57 |
|
58 else: |
|
59 countrytxt = '<unknown>' |
|
60 |
|
61 writer = show_progress(i+1, total, '%s => %s'%(tag.label, countrytxt), 50, writer) |