src/jocondelab/management/commands/import_dbpedia_geo_years.py
changeset 96 eca960eac58b
child 106 219e27e5e26d
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/src/jocondelab/management/commands/import_dbpedia_geo_years.py	Tue Aug 20 18:54:03 2013 +0200
@@ -0,0 +1,113 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Aug 08, 2013
+
+@author: rvelt
+'''
+
+from core.utils import show_progress
+from core.wp_utils import get_dbpedia_lang
+from django.conf import settings
+from django.core.management.base import NoArgsCommand
+from core.models import Term
+from jocondelab.models import (DbpediaYears, DbpediaGeo)
+from django.db import transaction
+import re
+from SPARQLWrapper import SPARQLWrapper2
+from optparse import make_option
+import traceback
+import sys
+import math
+from django.utils.http import urlunquote
+
+class Command(NoArgsCommand):
+    
+    help = "Import tag metadata from dbpedia"
+    
+    
+    def handle_noargs(self, **options):
+        
+        endpoints = {}
+        qs = Term.objects.exclude(dbpedia_uri=None).order_by('-nb_notice')
+        count = qs.count()
+        writer = None
+        
+        yearre = re.compile("^-?\d+")
+        sylbls = [  "birthdate", "startyear" ]
+        eylbls = [ "deathdate", "endyear" ]
+        
+        for i,obj in enumerate(qs):
+            writer = show_progress(i+1, count, obj.dbpedia_uri, 50, writer)
+            dbp_lang = get_dbpedia_lang(obj.dbpedia_uri)
+            if dbp_lang is None:
+                print("Lang unknown for %s, continue" % obj.dbpedia_uri)
+                continue
+            endpoint = endpoints.get(dbp_lang, None)
+            if endpoint is None:
+                dbpedia_sparql_url = settings.WIKIPEDIA_URLS.get(dbp_lang,{}).get('dbpedia_sparql_url', None)
+                if dbpedia_sparql_url is None:
+                    print("Lang unknown for %s, continue" % obj.dbpedia_uri)
+                    continue
+                endpoint = endpoints.setdefault(dbp_lang, SPARQLWrapper2(dbpedia_sparql_url))
+            try:
+                with transaction.commit_on_success():
+                    uri = urlunquote(obj.dbpedia_uri)
+                    sparql = u"""
+    select distinct * where {
+        OPTIONAL {
+          <%s> dbpedia-owl:activeYearsStartYear ?startyear .
+        }
+        OPTIONAL {
+          <%s> dbpedia-owl:activeYearsEndYear ?endyear .
+        }
+        OPTIONAL {
+          <%s> dbpedia-owl:birthDate ?birthdate .
+        }
+        OPTIONAL {
+          <%s> dbpedia-owl:deathDate ?deathdate .
+        }
+        OPTIONAL {
+          <%s> geo:lat ?latitude .
+          <%s> geo:long ?longitude .
+        }
+    }
+                    """%(6*(uri,))
+                    endpoint.setQuery(sparql)
+                    results = endpoint.query()
+                    
+                    if len(results.bindings):
+                        binding = results.bindings[0]
+                        syv = None
+                        eyv = None
+                        for lbl in sylbls:
+                            if lbl in binding:
+                                syv = yearre.findall(binding[lbl].value)
+                                break
+                        for lbl in eylbls:
+                            if lbl in binding:
+                                eyv = yearre.findall(binding[lbl].value)
+                                break
+                        if syv and eyv:
+                            sy = syv[0]
+                            ey = eyv[0]
+                            dbyr, created = DbpediaYears.objects.get_or_create(term = obj, defaults={'start_year': sy, 'end_year': ey})
+                            if not created:
+                                dbyr.start_year = sy
+                                dbyr.end_year = ey
+                                dbyr.save()
+                        
+                        lat = float(binding["latitude"].value) if "latitude" in binding else None
+                        lng = float(binding["longitude"].value) if "longitude" in binding else None
+                        
+                        if (lat is not None) and (not math.isnan(lat)) and (lng is not None) and (not math.isnan(lng)):
+                            dbgeo, created = DbpediaGeo.objects.get_or_create(term = obj, defaults={'latitude': lat, 'longitude': lng})
+                            if not created:
+                                dbgeo.latitude = lat
+                                dbgeo.longitude = lng
+                                dbgeo.save()
+                        
+            except Exception as e:
+                print "\nError processing resource %s : %s" %(obj.dbpedia_uri,unicode(e))
+                traceback.print_exception(type(e), e, sys.exc_info()[2])
+                
+        
\ No newline at end of file