--- a/src/hdalab/management/commands/query_dbpedia.py Fri Nov 14 17:06:02 2014 +0100
+++ b/src/hdalab/management/commands/query_dbpedia.py Sun Nov 16 18:09:42 2014 +0100
@@ -5,24 +5,26 @@
@author: ymh
'''
+from hdabo.models import Tag
+from hdabo.utils import show_progress
+from hdalab.models import DbpediaFields, TagLinks
+from hdalab.models.dataviz import DbpediaFieldsTranslation
+import logging
+from optparse import make_option
+import sys
+import traceback
+
+from SPARQLWrapper import SPARQLWrapper, N3
+from django import db
from django.conf import settings
from django.core.management.base import NoArgsCommand
from django.core.management.color import no_style
-
-from optparse import make_option
-from django.conf import settings
+from django.db import transaction
from django.db.models import Count
-from django.db import transaction
-from hdabo.models import Tag
-from hdalab.models import DbpediaFields, TagLinks
-from hdabo.utils import show_progress
-from rdflib.graph import Graph
-from rdflib import URIRef
-import traceback
-import sys
-from hdalab.models.dataviz import DbpediaFieldsTranslation
-from django import db
-from SPARQLWrapper import SPARQLWrapper, RDF
+from rdflib import URIRef, Graph
+
+
+logger = logging.getLogger(__name__)
class Command(NoArgsCommand):
'''
@@ -125,7 +127,7 @@
return
#endpoint = SPARQLWrapper("http://dbpedia.org/sparql", returnFormat=RDF)
- endpoint = SPARQLWrapper(settings.DBPEDIA_URI_TEMPLATE % ( 'sparql', '' ), returnFormat=RDF)
+ endpoint = SPARQLWrapper(settings.DBPEDIA_URI_TEMPLATE % ( 'sparql', '' ), returnFormat=N3)
writer = None
for i,tag in enumerate(queryset):
@@ -144,27 +146,30 @@
thumbnail = None
with transaction.commit_on_success():
endpoint.setQuery("select distinct ?y where {<%s> <http://dbpedia.org/ontology/abstract> ?y}" % (tag.dbpedia_uri))
- res_abstracts = endpoint.queryAndConvert()
+ res_abstracts = Graph().parse(data=endpoint.queryAndConvert(), format='n3')
for _,_,o in res_abstracts.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
abstracts[o.language] = (unicode(o), True)
-
+ logger.debug("Abstracts: %r" % abstracts)
+
endpoint.setQuery("select distinct ?y where {<%s> <http://www.w3.org/2000/01/rdf-schema#label> ?y}" % (tag.dbpedia_uri))
- res_labels = endpoint.queryAndConvert()
+ #res_labels = endpoint.queryAndConvert()
+ res_labels = Graph().parse(data=endpoint.queryAndConvert(), format='n3')
for _,_,o in res_labels.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
labels[o.language] = (unicode(o), True)
-
+ logger.debug("Labels: %r" % labels)
+
endpoint.setQuery("select distinct ?y where {<%s> <http://dbpedia.org/ontology/thumbnail> ?y} limit 1" % (tag.dbpedia_uri))
- res_thumbnails = endpoint.queryAndConvert()
+ res_thumbnails = Graph().parse(data=endpoint.queryAndConvert(), format='n3')
for _,_,o in res_thumbnails.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
thumbnail = unicode(o)
#endpoint.setQuery('select distinct ?y where { <%s> ?p ?y . FILTER regex(?y, "^http://dbpedia.org/resource")}' % (tag.dbpedia_uri))
endpoint.setQuery('select distinct ?y where { <%s> ?p ?y . FILTER regex(?y, "^%s")}' % (tag.dbpedia_uri, settings.DBPEDIA_URI_TEMPLATE % ( 'resource', '' )))
- res_links = endpoint.queryAndConvert()
+ res_links = Graph().parse(data=endpoint.queryAndConvert(), format='n3')
for _,_,o in res_links.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
tagqs = Tag.objects.filter(dbpedia_uri=unicode(o))
if tagqs:
- TagLinks.objects.get_or_create(subject=tag, object=tagqs[0])
+ TagLinks.objects.get_or_create(subject=tag, object=tagqs[0])
ref_label_lang, (ref_label, _) = ('fr',labels['fr']) if 'fr' in labels else ('en',labels['en']) if 'en' in labels else labels.items()[0] if len(labels) > 0 else ('fr',(tag.label, True))
ref_abstract_lang, (ref_abstract, _) = ('fr',abstracts['fr']) if 'fr' in abstracts else ('en',abstracts['en']) if 'en' in abstracts else abstracts.items()[0] if len(abstracts) > 0 else ('fr',(None, 'True'))