--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jocondelab/management/commands/import_dbpedia_geo_years.py Tue Aug 20 18:54:03 2013 +0200
@@ -0,0 +1,113 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Aug 08, 2013
+
+@author: rvelt
+'''
+
+from core.utils import show_progress
+from core.wp_utils import get_dbpedia_lang
+from django.conf import settings
+from django.core.management.base import NoArgsCommand
+from core.models import Term
+from jocondelab.models import (DbpediaYears, DbpediaGeo)
+from django.db import transaction
+import re
+from SPARQLWrapper import SPARQLWrapper2
+from optparse import make_option
+import traceback
+import sys
+import math
+from django.utils.http import urlunquote
+
+class Command(NoArgsCommand):
+
+ help = "Import tag metadata from dbpedia"
+
+
+ def handle_noargs(self, **options):
+
+ endpoints = {}
+ qs = Term.objects.exclude(dbpedia_uri=None).order_by('-nb_notice')
+ count = qs.count()
+ writer = None
+
+ yearre = re.compile("^-?\d+")
+ sylbls = [ "birthdate", "startyear" ]
+ eylbls = [ "deathdate", "endyear" ]
+
+ for i,obj in enumerate(qs):
+ writer = show_progress(i+1, count, obj.dbpedia_uri, 50, writer)
+ dbp_lang = get_dbpedia_lang(obj.dbpedia_uri)
+ if dbp_lang is None:
+ print("Lang unknown for %s, continue" % obj.dbpedia_uri)
+ continue
+ endpoint = endpoints.get(dbp_lang, None)
+ if endpoint is None:
+ dbpedia_sparql_url = settings.WIKIPEDIA_URLS.get(dbp_lang,{}).get('dbpedia_sparql_url', None)
+ if dbpedia_sparql_url is None:
+ print("Lang unknown for %s, continue" % obj.dbpedia_uri)
+ continue
+ endpoint = endpoints.setdefault(dbp_lang, SPARQLWrapper2(dbpedia_sparql_url))
+ try:
+ with transaction.commit_on_success():
+ uri = urlunquote(obj.dbpedia_uri)
+ sparql = u"""
+ select distinct * where {
+ OPTIONAL {
+ <%s> dbpedia-owl:activeYearsStartYear ?startyear .
+ }
+ OPTIONAL {
+ <%s> dbpedia-owl:activeYearsEndYear ?endyear .
+ }
+ OPTIONAL {
+ <%s> dbpedia-owl:birthDate ?birthdate .
+ }
+ OPTIONAL {
+ <%s> dbpedia-owl:deathDate ?deathdate .
+ }
+ OPTIONAL {
+ <%s> geo:lat ?latitude .
+ <%s> geo:long ?longitude .
+ }
+ }
+ """%(6*(uri,))
+ endpoint.setQuery(sparql)
+ results = endpoint.query()
+
+ if len(results.bindings):
+ binding = results.bindings[0]
+ syv = None
+ eyv = None
+ for lbl in sylbls:
+ if lbl in binding:
+ syv = yearre.findall(binding[lbl].value)
+ break
+ for lbl in eylbls:
+ if lbl in binding:
+ eyv = yearre.findall(binding[lbl].value)
+ break
+ if syv and eyv:
+ sy = syv[0]
+ ey = eyv[0]
+ dbyr, created = DbpediaYears.objects.get_or_create(term = obj, defaults={'start_year': sy, 'end_year': ey})
+ if not created:
+ dbyr.start_year = sy
+ dbyr.end_year = ey
+ dbyr.save()
+
+ lat = float(binding["latitude"].value) if "latitude" in binding else None
+ lng = float(binding["longitude"].value) if "longitude" in binding else None
+
+ if (lat is not None) and (not math.isnan(lat)) and (lng is not None) and (not math.isnan(lng)):
+ dbgeo, created = DbpediaGeo.objects.get_or_create(term = obj, defaults={'latitude': lat, 'longitude': lng})
+ if not created:
+ dbgeo.latitude = lat
+ dbgeo.longitude = lng
+ dbgeo.save()
+
+ except Exception as e:
+ print "\nError processing resource %s : %s" %(obj.dbpedia_uri,unicode(e))
+ traceback.print_exception(type(e), e, sys.exc_info()[2])
+
+
\ No newline at end of file