--- a/web/hdalab/management/commands/query_dbpedia.py Fri Nov 16 18:12:05 2012 +0100
+++ /dev/null Thu Jan 01 00:00:00 1970 +0000
@@ -1,205 +0,0 @@
-# -*- coding: utf-8 -*-
-'''
-Created on Jan 30, 2012
-
-@author: ymh
-'''
-
-from django.core.management.base import NoArgsCommand
-from django.core.management.color import no_style
-
-from optparse import make_option
-from django.conf import settings
-from django.db.models import Count
-from django.db import transaction
-from hdabo.models import Tag
-from hdalab.models import DbpediaFields, TagLinks
-from hdabo.utils import show_progress
-from rdflib.graph import Graph
-from rdflib import URIRef
-import traceback
-import sys
-from hdalab.models.dataviz import DbpediaFieldsTranslation
-from django import db
-from SPARQLWrapper import SPARQLWrapper, RDF
-
-class Command(NoArgsCommand):
- '''
- query and update wikipedia for tag title.
- '''
- options = ''
- help = """query and update wikipedia for tag title."""
-
- option_list = NoArgsCommand.option_list + (
- make_option('--all',
- action='store_true',
- dest='all',
- default=False,
- help='force all tags to be updated, not only those not yet processed'),
- make_option('--force',
- action='store_true',
- dest='force',
- default=False,
- help='ask no questions'),
- make_option('--random',
- action='store_true',
- dest='random',
- default=False,
- help='randomize query on tags'),
- make_option('--limit',
- action='store',
- type='int',
- dest='limit',
- default= -1,
- help='number of tag to process'),
- make_option('--start',
- action='store',
- type='int',
- dest='start',
- default=0,
- help='number of tag to ignore'),
- make_option('--tag',
- action='append',
- dest='tags',
- type='string',
- default=[],
- help='the tag to query'),
- )
-
- def handle_noargs(self, **options):
-
- self.style = no_style()
-
- self.interactive = options.get('interactive', True)
-
- self.verbosity = int(options.get('verbosity', '1'))
-
- self.force = options.get('force', False)
-
- self.limit = options.get("limit", -1)
- self.start = options.get("start", 0)
-
- self.random = options.get('random', False)
-
- if self.verbosity > 2:
- print "option passed : " + repr(options)
-
- self.tag_list = options.get("tags", []);
-
- queryset = Tag.objects.exclude(dbpedia_uri= None)
-
-
- if self.tag_list:
- queryset = queryset.filter(label__in=self.tag_list)
- elif not options.get('all',False):
- queryset = queryset.annotate(dbfc=Count('dbpedia_fields')).filter(dbfc = 0)
-
- if self.random:
- queryset = queryset.order_by("?")
- else:
- queryset = queryset.order_by("label")
-
- if self.limit >= 0:
- queryset = queryset[self.start:self.limit]
- elif self.start > 0:
- queryset = queryset[self.start:]
-
- if self.verbosity > 2 :
- print "Tag Query is %s" % (queryset.query)
-
- count = queryset.count()
-
- if count == 0:
- print "No tag to query : exit."
- return
-
-
- if not self.force and self.interactive:
- confirm = raw_input("You have requested to query and replace the dbpedia information for %d tags.\n Are you sure you want to do this? \nType 'yes' to continue, or 'no' to cancel: " % (count))
- else:
- confirm = 'yes'
-
- if confirm != "yes":
- print "dbpedia query cancelled"
- return
-
- endpoint = SPARQLWrapper("http://dbpedia.org/sparql", returnFormat=RDF)
-
- writer = None
- for i,tag in enumerate(queryset):
- writer = show_progress(i+1, count, tag.label, 50, writer)
- db.reset_queries()
-
- #abstract query
- #"select ?y
- # where {<%s> <http://dbpedia.org/ontology/abstract> ?y}" % (tag.dbpedia_uri)
-
- #rdf_uri = re.sub('\/resource\/', "/data/", tag.dbpedia_uri) + ".n3"
- #g = Graph()
- try :
- abstracts = {}
- labels = {}
- thumbnail = None
- with transaction.commit_on_success():
- endpoint.setQuery("select distinct ?y where {<%s> <http://dbpedia.org/ontology/abstract> ?y}" % (tag.dbpedia_uri))
- res_abstracts = endpoint.queryAndConvert()
- for _,_,o in res_abstracts.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
- abstracts[o.language] = (unicode(o), True)
-
- endpoint.setQuery("select distinct ?y where {<%s> <http://www.w3.org/2000/01/rdf-schema#label> ?y}" % (tag.dbpedia_uri))
- res_labels = endpoint.queryAndConvert()
- for _,_,o in res_labels.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
- labels[o.language] = (unicode(o), True)
-
- endpoint.setQuery("select distinct ?y where {<%s> <http://dbpedia.org/ontology/thumbnail> ?y} limit 1" % (tag.dbpedia_uri))
- res_thumbnails = endpoint.queryAndConvert()
- for _,_,o in res_thumbnails.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
- thumbnail = unicode(o)
-
- endpoint.setQuery('select distinct ?y where { <%s> ?p ?y . FILTER regex(?y, "^http://dbpedia.org/resource")}' % (tag.dbpedia_uri))
- res_links = endpoint.queryAndConvert()
- for _,_,o in res_links.triples((None, URIRef('http://www.w3.org/2005/sparql-results#value'), None)):
- tagqs = Tag.objects.filter(dbpedia_uri=unicode(o))
- if tagqs:
- TagLinks.objects.get_or_create(subject=tag, object=tagqs[0])
-
- ref_label_lang, (ref_label, _) = ('fr',labels['fr']) if 'fr' in labels else ('en',labels['en']) if 'en' in labels else labels.items()[0] if len(labels) > 0 else ('fr',(tag.label, True))
- ref_abstract_lang, (ref_abstract, _) = ('fr',abstracts['fr']) if 'fr' in abstracts else ('en',abstracts['en']) if 'en' in abstracts else abstracts.items()[0] if len(abstracts) > 0 else ('fr',(None, 'True'))
-
- for lang in settings.LANGUAGES:
- if lang[0] not in labels:
- labels[lang[0]]= (ref_label, False)
- if lang[0] not in abstracts:
- abstracts[lang[0]] = (ref_abstract, False)
-
- dbfield , created = DbpediaFields.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri,tag=tag, defaults={'abstract':ref_abstract, 'thumbnail':thumbnail, 'label':ref_label}) #@UndefinedVariable
- if not created:
- dbfield.abstract = ref_abstract
- dbfield.thumbnail = thumbnail
- dbfield.label = ref_label
- dbfield.save()
- DbpediaFieldsTranslation.objects.filter(master=dbfield).delete()
-
- consolidated_trans = {}
- for lang,label in labels.iteritems():
- consolidated_trans[lang] = [label,(ref_abstract, lang==ref_abstract_lang)]
- for lang,abstract in abstracts.iteritems():
- if lang in consolidated_trans:
- consolidated_trans[lang][1] = abstract
- else:
- consolidated_trans[lang] = [(ref_label, lang==ref_label_lang), abstract]
-
- for lang, trans in consolidated_trans.iteritems():
- label, abstract = tuple(trans)
- DbpediaFieldsTranslation.objects.create(master=dbfield, language_code=lang, label=label[0], is_label_translated=label[1], abstract=abstract[0], is_abstract_translated=abstract[1])
-
-
- except Exception as e:
- print "\nError processing resource %s : %s" %(rdf_uri,unicode(e))
- traceback.print_exception(type(e), e, sys.exc_info()[2])
-
-
-
-
-
-