diff -r d162cb0c4386 -r 46ad324f6fe4 src/hdalab/management/commands/query_dbpedia.py --- a/src/hdalab/management/commands/query_dbpedia.py Fri Nov 14 17:06:02 2014 +0100 +++ b/src/hdalab/management/commands/query_dbpedia.py Sun Nov 16 18:09:42 2014 +0100 @@ -5,24 +5,26 @@ @author: ymh ''' +from hdabo.models import Tag +from hdabo.utils import show_progress +from hdalab.models import DbpediaFields, TagLinks +from hdalab.models.dataviz import DbpediaFieldsTranslation +import logging +from optparse import make_option +import sys +import traceback + +from SPARQLWrapper import SPARQLWrapper, N3 +from django import db from django.conf import settings from django.core.management.base import NoArgsCommand from django.core.management.color import no_style - -from optparse import make_option -from django.conf import settings +from django.db import transaction from django.db.models import Count -from django.db import transaction -from hdabo.models import Tag -from hdalab.models import DbpediaFields, TagLinks -from hdabo.utils import show_progress -from rdflib.graph import Graph -from rdflib import URIRef -import traceback -import sys -from hdalab.models.dataviz import DbpediaFieldsTranslation -from django import db -from SPARQLWrapper import SPARQLWrapper, RDF +from rdflib import URIRef, Graph + + +logger = logging.getLogger(__name__) class Command(NoArgsCommand): ''' @@ -125,7 +127,7 @@ return #endpoint = SPARQLWrapper("http://dbpedia.org/sparql", returnFormat=RDF) - endpoint = SPARQLWrapper(settings.DBPEDIA_URI_TEMPLATE % ( 'sparql', '' ), returnFormat=RDF) + endpoint = SPARQLWrapper(settings.DBPEDIA_URI_TEMPLATE % ( 'sparql', '' ), returnFormat=N3) writer = None for i,tag in enumerate(queryset): @@ -144,27 +146,30 @@ thumbnail = None with transaction.commit_on_success(): endpoint.setQuery("select distinct ?y where {<%s> ?y}" % (tag.dbpedia_uri)) - res_abstracts = endpoint.queryAndConvert() + res_abstracts = Graph().parse(data=endpoint.queryAndConvert(), format='n3') for _,_,o in res_abstracts.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)): abstracts[o.language] = (unicode(o), True) - + logger.debug("Abstracts: %r" % abstracts) + endpoint.setQuery("select distinct ?y where {<%s> ?y}" % (tag.dbpedia_uri)) - res_labels = endpoint.queryAndConvert() + #res_labels = endpoint.queryAndConvert() + res_labels = Graph().parse(data=endpoint.queryAndConvert(), format='n3') for _,_,o in res_labels.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)): labels[o.language] = (unicode(o), True) - + logger.debug("Labels: %r" % labels) + endpoint.setQuery("select distinct ?y where {<%s> ?y} limit 1" % (tag.dbpedia_uri)) - res_thumbnails = endpoint.queryAndConvert() + res_thumbnails = Graph().parse(data=endpoint.queryAndConvert(), format='n3') for _,_,o in res_thumbnails.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)): thumbnail = unicode(o) #endpoint.setQuery('select distinct ?y where { <%s> ?p ?y . FILTER regex(?y, "^http://dbpedia.org/resource")}' % (tag.dbpedia_uri)) endpoint.setQuery('select distinct ?y where { <%s> ?p ?y . FILTER regex(?y, "^%s")}' % (tag.dbpedia_uri, settings.DBPEDIA_URI_TEMPLATE % ( 'resource', '' ))) - res_links = endpoint.queryAndConvert() + res_links = Graph().parse(data=endpoint.queryAndConvert(), format='n3') for _,_,o in res_links.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)): tagqs = Tag.objects.filter(dbpedia_uri=unicode(o)) if tagqs: - TagLinks.objects.get_or_create(subject=tag, object=tagqs[0]) + TagLinks.objects.get_or_create(subject=tag, object=tagqs[0]) ref_label_lang, (ref_label, _) = ('fr',labels['fr']) if 'fr' in labels else ('en',labels['en']) if 'en' in labels else labels.items()[0] if len(labels) > 0 else ('fr',(tag.label, True)) ref_abstract_lang, (ref_abstract, _) = ('fr',abstracts['fr']) if 'fr' in abstracts else ('en',abstracts['en']) if 'en' in abstracts else abstracts.items()[0] if len(abstracts) > 0 else ('fr',(None, 'True'))