--- a/src/jocondelab/management/commands/import_dbpedia_translations.py Fri Aug 16 15:28:40 2013 +0200
+++ b/src/jocondelab/management/commands/import_dbpedia_translations.py Fri Aug 16 16:32:06 2013 +0200
@@ -14,6 +14,9 @@
from SPARQLWrapper import SPARQLWrapper2
from optparse import make_option
import traceback
+import sys
+import urllib
+import json
class Command(NoArgsCommand):
@@ -30,7 +33,6 @@
def handle_noargs(self, **options):
- langstr = ','.join(['"%s"'%l[0] for l in settings.LANGUAGES])
endpointre = re.compile('^http:\/\/\w+\.?dbpedia.org')
qs = DbpediaResource.objects
@@ -46,29 +48,36 @@
for i,obj in enumerate(qs):
writer = show_progress(i+1, count, obj.uri, 50, writer)
-
- try:
- with transaction.commit_on_success():
- DbpediaTranslation.objects.filter(dbpediaresource=obj).delete()
- endpointuri = settings.WIKIPEDIA_URLS[obj.lang]['dbpedia_sparql_url']
- endpoint = SPARQLWrapper2(endpointuri)
- sparql = """
- select lang(?label) as ?lang, ?label where {
- <%s> rdfs:label ?label .
- FILTER (lang(?label) IN (%s))
- }
- """%(obj.uri, langstr)
- endpoint.setQuery(sparql)
- results = endpoint.query()
-
- for binding in results.bindings:
- DbpediaTranslation.objects.create(
- dbpediaresource = obj,
- lang = binding[u"lang"].value,
- label = binding[u"label"].value
- )
- except Exception as e:
- print "\nError processing resource %s : %s" %(obj.uri,unicode(e))
- traceback.print_exception(type(e), e, sys.exc_info()[2])
+ for langtuple in settings.LANGUAGES:
+ lang = langtuple[0]
+ try:
+ with transaction.commit_on_success():
+ DbpediaTranslation.objects.filter(dbpediaresource=obj).delete()
+ endpointuri = settings.WIKIPEDIA_URLS[obj.lang]['dbpedia_sparql_url']
+ endpoint = SPARQLWrapper2(endpointuri)
+ uri = urllib.unquote(str(obj.uri)).decode('utf8')
+ sparql = u"""
+ select ?label, ?rlabel where {
+ OPTIONAL { <%s> rdfs:label ?label FILTER (lang(?label) = "%s") } .
+ OPTIONAL { <%s> dbpedia-owl:wikiPageRedirects ?r }.
+ OPTIONAL { ?r rdfs:label ?rlabel FILTER (lang(?rlabel) = "%s") }.
+ }
+ """%(uri, lang, uri, lang)
+ endpoint.setQuery(sparql)
+ results = endpoint.query()
+
+ if len(results.bindings):
+ binding = results.bindings[0]
+ label = binding[u"label"].value if "label" in binding else binding[u"rlabel"].value if "rlabel" in binding else None
+ if label is not None:
+ DbpediaTranslation.objects.create(
+ dbpediaresource = obj,
+ lang = lang,
+ label = label
+ )
+
+ except Exception as e:
+ print "\nError processing resource %s : %s" %(obj.uri,unicode(e))
+ traceback.print_exception(type(e), e, sys.exc_info()[2])
\ No newline at end of file