src/hdalab/management/commands/query_dbpedia.py
changeset 359 46ad324f6fe4
parent 284 f01235a1dcc2
child 360 6129e7390abe
--- a/src/hdalab/management/commands/query_dbpedia.py	Fri Nov 14 17:06:02 2014 +0100
+++ b/src/hdalab/management/commands/query_dbpedia.py	Sun Nov 16 18:09:42 2014 +0100
@@ -5,24 +5,26 @@
 @author: ymh
 '''
 
+from hdabo.models import Tag
+from hdabo.utils import show_progress
+from hdalab.models import DbpediaFields, TagLinks
+from hdalab.models.dataviz import DbpediaFieldsTranslation
+import logging
+from optparse import make_option
+import sys
+import traceback
+
+from SPARQLWrapper import SPARQLWrapper, N3
+from django import db
 from django.conf import settings
 from django.core.management.base import NoArgsCommand
 from django.core.management.color import no_style
-
-from optparse import make_option
-from django.conf import settings
+from django.db import transaction
 from django.db.models import Count
-from django.db import transaction
-from hdabo.models import Tag
-from hdalab.models import DbpediaFields, TagLinks
-from hdabo.utils import show_progress
-from rdflib.graph import Graph
-from rdflib import URIRef
-import traceback
-import sys
-from hdalab.models.dataviz import DbpediaFieldsTranslation
-from django import db
-from SPARQLWrapper import SPARQLWrapper, RDF
+from rdflib import URIRef, Graph
+
+
+logger = logging.getLogger(__name__)
 
 class Command(NoArgsCommand):
     '''
@@ -125,7 +127,7 @@
             return
 
         #endpoint = SPARQLWrapper("http://dbpedia.org/sparql", returnFormat=RDF)
-        endpoint = SPARQLWrapper(settings.DBPEDIA_URI_TEMPLATE % ( 'sparql', '' ), returnFormat=RDF)
+        endpoint = SPARQLWrapper(settings.DBPEDIA_URI_TEMPLATE % ( 'sparql', '' ), returnFormat=N3)
 
         writer = None
         for i,tag in enumerate(queryset):
@@ -144,27 +146,30 @@
                 thumbnail = None
                 with transaction.commit_on_success():
                     endpoint.setQuery("select distinct ?y where {<%s>  <http://dbpedia.org/ontology/abstract> ?y}" % (tag.dbpedia_uri))
-                    res_abstracts = endpoint.queryAndConvert()
+                    res_abstracts = Graph().parse(data=endpoint.queryAndConvert(), format='n3')
                     for _,_,o in res_abstracts.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
                         abstracts[o.language] = (unicode(o), True)
-
+                    logger.debug("Abstracts: %r" % abstracts)
+                    
                     endpoint.setQuery("select distinct ?y where {<%s>  <http://www.w3.org/2000/01/rdf-schema#label> ?y}" % (tag.dbpedia_uri))
-                    res_labels = endpoint.queryAndConvert()
+                    #res_labels = endpoint.queryAndConvert()
+                    res_labels = Graph().parse(data=endpoint.queryAndConvert(), format='n3')
                     for _,_,o in res_labels.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
                         labels[o.language] = (unicode(o), True)
-                            
+                    logger.debug("Labels: %r" % labels)
+                    
                     endpoint.setQuery("select distinct ?y where {<%s>  <http://dbpedia.org/ontology/thumbnail> ?y} limit 1" % (tag.dbpedia_uri))
-                    res_thumbnails = endpoint.queryAndConvert()
+                    res_thumbnails = Graph().parse(data=endpoint.queryAndConvert(), format='n3')
                     for _,_,o in res_thumbnails.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
                         thumbnail = unicode(o)
 
                     #endpoint.setQuery('select distinct ?y where { <%s> ?p ?y . FILTER regex(?y, "^http://dbpedia.org/resource")}' % (tag.dbpedia_uri))
                     endpoint.setQuery('select distinct ?y where { <%s> ?p ?y . FILTER regex(?y, "^%s")}' % (tag.dbpedia_uri, settings.DBPEDIA_URI_TEMPLATE % ( 'resource', '' )))
-                    res_links = endpoint.queryAndConvert()
+                    res_links = Graph().parse(data=endpoint.queryAndConvert(), format='n3')
                     for _,_,o in res_links.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
                         tagqs = Tag.objects.filter(dbpedia_uri=unicode(o))
                         if tagqs:
-                            TagLinks.objects.get_or_create(subject=tag, object=tagqs[0])                        
+                            TagLinks.objects.get_or_create(subject=tag, object=tagqs[0])
                     
                     ref_label_lang, (ref_label, _) = ('fr',labels['fr']) if 'fr' in labels else ('en',labels['en']) if 'en' in labels else labels.items()[0] if len(labels) > 0 else ('fr',(tag.label, True))
                     ref_abstract_lang, (ref_abstract, _) = ('fr',abstracts['fr']) if 'fr' in abstracts else ('en',abstracts['en']) if 'en' in abstracts else abstracts.items()[0] if len(abstracts) > 0 else ('fr',(None, 'True'))