# HG changeset patch # User veltr # Date 1376663526 -7200 # Node ID fa40437d5991c88d213ec50ca160439acea53c42 # Parent 87443e64bece8c056f3b1b5e256e1ea15c4a10af Corrections in dbpedia label import diff -r 87443e64bece -r fa40437d5991 src/jocondelab/management/commands/import_dbpedia_translations.py --- a/src/jocondelab/management/commands/import_dbpedia_translations.py Fri Aug 16 15:28:40 2013 +0200 +++ b/src/jocondelab/management/commands/import_dbpedia_translations.py Fri Aug 16 16:32:06 2013 +0200 @@ -14,6 +14,9 @@ from SPARQLWrapper import SPARQLWrapper2 from optparse import make_option import traceback +import sys +import urllib +import json class Command(NoArgsCommand): @@ -30,7 +33,6 @@ def handle_noargs(self, **options): - langstr = ','.join(['"%s"'%l[0] for l in settings.LANGUAGES]) endpointre = re.compile('^http:\/\/\w+\.?dbpedia.org') qs = DbpediaResource.objects @@ -46,29 +48,36 @@ for i,obj in enumerate(qs): writer = show_progress(i+1, count, obj.uri, 50, writer) - - try: - with transaction.commit_on_success(): - DbpediaTranslation.objects.filter(dbpediaresource=obj).delete() - endpointuri = settings.WIKIPEDIA_URLS[obj.lang]['dbpedia_sparql_url'] - endpoint = SPARQLWrapper2(endpointuri) - sparql = """ - select lang(?label) as ?lang, ?label where { - <%s> rdfs:label ?label . - FILTER (lang(?label) IN (%s)) - } - """%(obj.uri, langstr) - endpoint.setQuery(sparql) - results = endpoint.query() - - for binding in results.bindings: - DbpediaTranslation.objects.create( - dbpediaresource = obj, - lang = binding[u"lang"].value, - label = binding[u"label"].value - ) - except Exception as e: - print "\nError processing resource %s : %s" %(obj.uri,unicode(e)) - traceback.print_exception(type(e), e, sys.exc_info()[2]) + for langtuple in settings.LANGUAGES: + lang = langtuple[0] + try: + with transaction.commit_on_success(): + DbpediaTranslation.objects.filter(dbpediaresource=obj).delete() + endpointuri = settings.WIKIPEDIA_URLS[obj.lang]['dbpedia_sparql_url'] + endpoint = SPARQLWrapper2(endpointuri) + uri = urllib.unquote(str(obj.uri)).decode('utf8') + sparql = u""" + select ?label, ?rlabel where { + OPTIONAL { <%s> rdfs:label ?label FILTER (lang(?label) = "%s") } . + OPTIONAL { <%s> dbpedia-owl:wikiPageRedirects ?r }. + OPTIONAL { ?r rdfs:label ?rlabel FILTER (lang(?rlabel) = "%s") }. + } + """%(uri, lang, uri, lang) + endpoint.setQuery(sparql) + results = endpoint.query() + + if len(results.bindings): + binding = results.bindings[0] + label = binding[u"label"].value if "label" in binding else binding[u"rlabel"].value if "rlabel" in binding else None + if label is not None: + DbpediaTranslation.objects.create( + dbpediaresource = obj, + lang = lang, + label = label + ) + + except Exception as e: + print "\nError processing resource %s : %s" %(obj.uri,unicode(e)) + traceback.print_exception(type(e), e, sys.exc_info()[2]) \ No newline at end of file