--- a/web/hdalab/management/commands/query_dbpedia.py Thu Sep 06 12:16:27 2012 +0200
+++ b/web/hdalab/management/commands/query_dbpedia.py Sat Sep 08 03:49:10 2012 +0200
@@ -17,11 +17,11 @@
from hdabo.utils import show_progress
from rdflib.graph import Graph
from rdflib import URIRef
-import re
import traceback
import sys
from hdalab.models.dataviz import DbpediaFieldsTranslation
from django import db
+from SPARQLWrapper import SPARQLWrapper, RDF
class Command(NoArgsCommand):
'''
@@ -123,41 +123,54 @@
print "dbpedia query cancelled"
return
+ endpoint = SPARQLWrapper("http://dbpedia.org/sparql", returnFormat=RDF)
+
writer = None
for i,tag in enumerate(queryset):
writer = show_progress(i+1, count, tag.label, 50, writer)
db.reset_queries()
- rdf_uri = re.sub('\/resource\/', "/data/", tag.dbpedia_uri) + ".n3"
- g = Graph()
+
+ #abstract query
+ #"select ?y
+ # where {<%s> <http://dbpedia.org/ontology/abstract> ?y}" % (tag.dbpedia_uri)
+
+ #rdf_uri = re.sub('\/resource\/', "/data/", tag.dbpedia_uri) + ".n3"
+ #g = Graph()
try :
- g.parse(rdf_uri, format="n3")
-
+ abstracts = {}
+ labels = {}
+ thumbnail = None
with transaction.commit_on_success():
+ endpoint.setQuery("select distinct ?y where {<%s> <http://dbpedia.org/ontology/abstract> ?y}" % (tag.dbpedia_uri))
+ res_abstracts = endpoint.queryAndConvert()
+ for _,_,o in res_abstracts.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
+ abstracts[o.language] = (unicode(o), True)
+
+ endpoint.setQuery("select distinct ?y where {<%s> <http://www.w3.org/2000/01/rdf-schema#label> ?y}" % (tag.dbpedia_uri))
+ res_labels = endpoint.queryAndConvert()
+ for _,_,o in res_labels.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
+ labels[o.language] = (unicode(o), True)
+
+ endpoint.setQuery("select distinct ?y where {<%s> <http://dbpedia.org/ontology/thumbnail> ?y} limit 1" % (tag.dbpedia_uri))
+ res_thumbnails = endpoint.queryAndConvert()
+ for _,_,o in res_thumbnails.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
+ thumbnail = unicode(o)
+
+ endpoint.setQuery('select distinct ?y where { <%s> ?p ?y . FILTER regex(?y, "^http://dbpedia.org/resource")}' % (tag.dbpedia_uri))
+ res_links = endpoint.queryAndConvert()
+ for _,_,o in res_links.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
+ tagqs = Tag.objects.filter(dbpedia_uri=unicode(o))
+ if tagqs:
+ TagLinks.objects.get_or_create(subject=tag, object=tagqs[0])
- abstracts = {}
- labels = {}
- thumbnail = None
- for t in g:
- if t[1] == URIRef(u'http://dbpedia.org/ontology/abstract') and t[2] is not None \
- and hasattr(t[2], 'language'):
- abstracts[t[2].language] = unicode(t[2])
- if t[1] == URIRef(u'http://www.w3.org/2000/01/rdf-schema#label') and t[2] is not None \
- and hasattr(t[2], 'language'):
- labels[t[2].language] = unicode(t[2])
- if t[1] == URIRef(u'http://dbpedia.org/ontology/thumbnail') and t[2] is not None:
- thumbnail = unicode(t[2])
- if u'http://dbpedia.org/resource' in t[2]:
- tagqs = Tag.objects.filter(dbpedia_uri=unicode(t[2]))
- if tagqs:
- TagLinks.objects.get_or_create(subject=tag, object=tagqs[0])
-
- ref_label = labels['fr'] if 'fr' in labels else labels['en'] if 'en' in labels else labels.values()[0] if len(labels) > 0 else tag.label
- ref_abstract = abstracts['fr'] if 'fr' in abstracts else abstracts['en'] if 'en' in abstracts else abstracts.values()[0] if len(abstracts) > 0 else None
+ ref_label_lang, (ref_label, _) = ('fr',labels['fr']) if 'fr' in labels else ('en',labels['en']) if 'en' in labels else labels.items()[0] if len(labels) > 0 else ('fr',(tag.label, True))
+ ref_abstract_lang, (ref_abstract, _) = ('fr',abstracts['fr']) if 'fr' in abstracts else ('en',abstracts['en']) if 'en' in abstracts else abstracts.items()[0] if len(abstracts) > 0 else ('fr',(None, 'True'))
+
for lang in settings.LANGUAGES:
if lang[0] not in labels:
- labels[lang[0]]= ref_label
+ labels[lang[0]]= (ref_label, False)
if lang[0] not in abstracts:
- abstracts[lang[0]] = ref_abstract
+ abstracts[lang[0]] = (ref_abstract, False)
dbfield , created = DbpediaFields.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri,tag=tag, defaults={'abstract':ref_abstract, 'thumbnail':thumbnail, 'label':ref_label}) #@UndefinedVariable
if not created:
@@ -169,16 +182,16 @@
consolidated_trans = {}
for lang,label in labels.iteritems():
- consolidated_trans[lang] = [label,ref_abstract]
+ consolidated_trans[lang] = [label,(ref_abstract, lang==ref_abstract_lang)]
for lang,abstract in abstracts.iteritems():
if lang in consolidated_trans:
consolidated_trans[lang][1] = abstract
else:
- consolidated_trans[lang] = [ref_label, abstract]
+ consolidated_trans[lang] = [(ref_label, lang==ref_label_lang), abstract]
for lang, trans in consolidated_trans.iteritems():
label, abstract = tuple(trans)
- DbpediaFieldsTranslation.objects.create(master=dbfield, language_code=lang, label=label, abstract=abstract)
+ DbpediaFieldsTranslation.objects.create(master=dbfield, language_code=lang, label=label[0], is_label_translated=label[1], abstract=abstract[0], is_abstract_translated=abstract[1])
except Exception as e: