src/hdalab/management/commands/query_dbpedia.py
changeset 361 a1b85604132c
parent 360 6129e7390abe
child 571 d9642be7c937
--- a/src/hdalab/management/commands/query_dbpedia.py	Sun Nov 16 19:35:12 2014 +0100
+++ b/src/hdalab/management/commands/query_dbpedia.py	Mon Nov 17 16:48:36 2014 +0100
@@ -14,7 +14,6 @@
 import sys
 import traceback
 
-from SPARQLWrapper import SPARQLWrapper, N3
 from django import db
 from django.conf import settings
 from django.core.management.base import NoArgsCommand
@@ -22,6 +21,7 @@
 from django.db import transaction
 from django.db.models import Count
 from rdflib import URIRef, Graph
+import requests
 
 
 logger = logging.getLogger(__name__)
@@ -69,12 +69,17 @@
             help='the tag to query'),
     )
     
-    def query_dbpedia(self, query, fmt=N3):
-        endpoint = SPARQLWrapper(settings.DBPEDIA_URI_TEMPLATE % ( 'sparql', '' ), returnFormat=fmt)
-        endpoint.setQuery(query)
-        return Graph().parse(data=endpoint.queryAndConvert(), format=fmt)
-        
-    
+    def query_dbpedia(self, query, fmt='n3'):
+        url = settings.DBPEDIA_URI_TEMPLATE % ( 'sparql', '' )
+        params = {
+            'query': query,
+            'format': {'n3':'text/turtle', 'rdf/xml':"application/rdf+xml", 'nt': 'text/plain'}.get(fmt, 'text/turtle')
+        }
+        resp = requests.get(url, params=params)
+        logger.debug("Query dbpedia : %s", resp.text)
+        return Graph().parse(data=resp.text, format=fmt)
+
+
     def handle_noargs(self, **options):
                 
         self.style = no_style()
@@ -148,21 +153,21 @@
                 labels = {}
                 thumbnail = None
                 with transaction.commit_on_success():
-                    res_abstracts = self.query_dbpedia("select distinct ?y where {<%s>  <http://dbpedia.org/ontology/abstract> ?y}" % (tag.dbpedia_uri), N3)
+                    res_abstracts = self.query_dbpedia("select distinct ?y where {<%s>  <http://dbpedia.org/ontology/abstract> ?y}" % (tag.dbpedia_uri), 'n3')
                     for _,_,o in res_abstracts.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
                         abstracts[o.language] = (unicode(o), True)
                     logger.debug("Abstracts: %r" % abstracts)
                     
-                    res_labels = self.query_dbpedia("select distinct ?y where {<%s>  <http://www.w3.org/2000/01/rdf-schema#label> ?y}" % (tag.dbpedia_uri), N3)
+                    res_labels = self.query_dbpedia("select distinct ?y where {<%s>  <http://www.w3.org/2000/01/rdf-schema#label> ?y}" % (tag.dbpedia_uri), 'n3')
                     for _,_,o in res_labels.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
                         labels[o.language] = (unicode(o), True)
                     logger.debug("Labels: %r" % labels)
                     
-                    res_thumbnails = self.query_dbpedia("select distinct ?y where {<%s>  <http://dbpedia.org/ontology/thumbnail> ?y} limit 1" % (tag.dbpedia_uri), N3)
+                    res_thumbnails = self.query_dbpedia("select distinct ?y where {<%s>  <http://dbpedia.org/ontology/thumbnail> ?y} limit 1" % (tag.dbpedia_uri), 'n3')
                     for _,_,o in res_thumbnails.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
                         thumbnail = unicode(o)
 
-                    res_links = self.query_dbpedia('select distinct ?y where { <%s> ?p ?y . FILTER regex(?y, "^%s")}' % (tag.dbpedia_uri, settings.DBPEDIA_URI_TEMPLATE % ( 'resource', '' )), N3)
+                    res_links = self.query_dbpedia('select distinct ?y where { <%s> ?p ?y . FILTER regex(?y, "^%s")}' % (tag.dbpedia_uri, settings.DBPEDIA_URI_TEMPLATE % ( 'resource', '' )), 'n3')
                     for _,_,o in res_links.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
                         tagqs = Tag.objects.filter(dbpedia_uri=unicode(o))
                         if tagqs:
@@ -177,8 +182,9 @@
                         if lang[0] not in abstracts:
                             abstracts[lang[0]] = (ref_abstract, False)
 
-                    dbfield , created = DbpediaFields.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri,tag=tag, defaults={'abstract':ref_abstract, 'thumbnail':thumbnail, 'label':ref_label}) #@UndefinedVariable                    
+                    dbfield , created = DbpediaFields.objects.get_or_create(tag=tag, defaults={'dbpedia_uri':tag.dbpedia_uri, 'abstract':ref_abstract, 'thumbnail':thumbnail, 'label':ref_label}) #@UndefinedVariable                    
                     if not created:
+                        dbfield.dbpedia_uri = tag.dbpedia_uri
                         dbfield.abstract = ref_abstract
                         dbfield.thumbnail = thumbnail
                         dbfield.label = ref_label