src/hdalab/management/commands/query_geo_inclusion.py
author ymh <ymh.work@gmail.com>
Fri, 19 Jul 2024 09:38:03 +0200
changeset 704 b5835dca2624
parent 359 46ad324f6fe4
child 693 09e00f38d177
permissions -rw-r--r--
Adapt renkan preview to uses chrome headless/puppeteer

# -*- coding: utf-8 -*-
'''
Created on Feb 22, 2012

@author: raphv
'''
from django.conf import settings
from django.core.management.base import NoArgsCommand
from django.core.management.color import no_style
from hdabo.utils import show_progress
from hdabo.models import Tag
from hdalab.models import Country, GeoInclusion
from SPARQLWrapper import SPARQLWrapper, JSON
import re

class Command(NoArgsCommand):
    def handle_noargs(self, **options):
        self.style = no_style()

        GeoInclusion.objects.all().delete()
        Country.objects.all().delete()

        qs = Tag.objects.filter(category__label="Localisation").exclude(dbpedia_uri = None)
        total = qs.count()
        
        #endpoint = SPARQLWrapper("http://dbpedia.org/sparql")
        endpoint = SPARQLWrapper(settings.DBPEDIA_URI_TEMPLATE % ( 'sparql', '' ))
        endpoint.setReturnFormat(JSON)
        sparqltext = """
            SELECT ?resource WHERE {
             { <%s> ?resource <http://dbpedia.org/ontology/Country> . }
            UNION
             { <%s> <http://dbpedia.org/ontology/country> ?resource . }
            }
        """
        #resourceprefix = "http://dbpedia.org/resource/"
        resourceprefix = settings.DBPEDIA_URI_TEMPLATE % ( 'resource', '' )
        identityuri = "http://www.w3.org/1999/02/22-rdf-syntax-ns#type"
        
        writer = None
        
        for i,tag in enumerate(qs):
            endpoint.setQuery(sparqltext % (tag.dbpedia_uri, tag.dbpedia_uri))
    
            results = endpoint.query().convert()['results']['bindings']
            
            if len(results) == 1: # We don't want places located in multiple countries
                
                resourceuri = results[0]['resource']['value']
                
                if re.match(resourceprefix, resourceuri):
                    countrytxt = re.findall('([^/]+$)', resourceuri)[0]
                    
                    country, _ = Country.objects.get_or_create(dbpedia_uri=resourceuri)
                    GeoInclusion.objects.get_or_create(tag=tag, country=country)
                    
                if resourceuri == identityuri:
                    countrytxt = '<is a country>'
                    
                    country, _ = Country.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri)
                    GeoInclusion.objects.get_or_create(tag=tag, country=country)
                
            else:
                countrytxt = '<unknown>'
            
            writer = show_progress(i+1, total, '%s => %s'%(tag.label, countrytxt), 50, writer)