--- /dev/null Thu Jan 01 00:00:00 1970 +0000
+++ b/src/hdalab/management/commands/query_dbpedia.py Mon Jun 16 17:11:32 2014 +0200
@@ -0,0 +1,192 @@
+# -*- coding: utf-8 -*-
+'''
+Created on Jan 30, 2012
+
+@author: ymh
+'''
+
+from django.core.management.base import NoArgsCommand
+from django.core.management.color import no_style
+
+from optparse import make_option
+from django.conf import settings
+from django.db.models import Count
+from django.db import transaction
+from hdabo.models import Tag
+from hdalab.models import DbpediaFields, TagLinks
+from hdabo.utils import show_progress
+from rdflib.graph import Graph
+from rdflib import URIRef
+import re
+import traceback
+import sys
+from hdalab.models.dataviz import DbpediaFieldsTranslation
+from django import db
+
+class Command(NoArgsCommand):
+ '''
+ query and update wikipedia for tag title.
+ '''
+ options = ''
+ help = """query and update wikipedia for tag title."""
+
+ option_list = NoArgsCommand.option_list + (
+ make_option('--all',
+ action='store_true',
+ dest='all',
+ default=False,
+ help='force all tags to be updated, not only those not yet processed'),
+ make_option('--force',
+ action='store_true',
+ dest='force',
+ default=False,
+ help='ask no questions'),
+ make_option('--random',
+ action='store_true',
+ dest='random',
+ default=False,
+ help='randomize query on tags'),
+ make_option('--limit',
+ action='store',
+ type='int',
+ dest='limit',
+ default= -1,
+ help='number of tag to process'),
+ make_option('--start',
+ action='store',
+ type='int',
+ dest='start',
+ default=0,
+ help='number of tag to ignore'),
+ make_option('--tag',
+ action='append',
+ dest='tags',
+ type='string',
+ default=[],
+ help='the tag to query'),
+ )
+
+ def handle_noargs(self, **options):
+
+ self.style = no_style()
+
+ self.interactive = options.get('interactive', True)
+
+ self.verbosity = int(options.get('verbosity', '1'))
+
+ self.force = options.get('force', False)
+
+ self.limit = options.get("limit", -1)
+ self.start = options.get("start", 0)
+
+ self.random = options.get('random', False)
+
+ if self.verbosity > 2:
+ print "option passed : " + repr(options)
+
+ self.tag_list = options.get("tags", []);
+
+ queryset = Tag.objects.exclude(dbpedia_uri= None)
+
+
+ if self.tag_list:
+ queryset = queryset.filter(label__in=self.tag_list)
+ elif not options.get('all',False):
+ queryset = queryset.annotate(dbfc=Count('dbpedia_fields')).filter(dbfc = 0)
+
+ if self.random:
+ queryset = queryset.order_by("?")
+ else:
+ queryset = queryset.order_by("label")
+
+ if self.limit >= 0:
+ queryset = queryset[self.start:self.limit]
+ elif self.start > 0:
+ queryset = queryset[self.start:]
+
+ if self.verbosity > 2 :
+ print "Tag Query is %s" % (queryset.query)
+
+ count = queryset.count()
+
+ if count == 0:
+ print "No tag to query : exit."
+ return
+
+
+ if not self.force and self.interactive:
+ confirm = raw_input("You have requested to query and replace the dbpedia information for %d tags.\n Are you sure you want to do this? \nType 'yes' to continue, or 'no' to cancel: " % (count))
+ else:
+ confirm = 'yes'
+
+ if confirm != "yes":
+ print "dbpedia query cancelled"
+ return
+
+ writer = None
+ for i,tag in enumerate(queryset):
+ writer = show_progress(i+1, count, tag.label, 50, writer)
+ db.reset_queries()
+ rdf_uri = re.sub('\/resource\/', "/data/", tag.dbpedia_uri) + ".n3"
+ g = Graph()
+ try :
+ g.parse(rdf_uri, format="n3")
+
+ with transaction.commit_on_success():
+
+ abstracts = {}
+ labels = {}
+ thumbnail = None
+ for t in g:
+ if t[1] == URIRef(u'http://dbpedia.org/ontology/abstract') and t[2] is not None \
+ and hasattr(t[2], 'language'):
+ abstracts[t[2].language] = unicode(t[2])
+ if t[1] == URIRef(u'http://www.w3.org/2000/01/rdf-schema#label') and t[2] is not None \
+ and hasattr(t[2], 'language'):
+ labels[t[2].language] = unicode(t[2])
+ if t[1] == URIRef(u'http://dbpedia.org/ontology/thumbnail') and t[2] is not None:
+ thumbnail = unicode(t[2])
+ if u'http://dbpedia.org/resource' in t[2]:
+ tagqs = Tag.objects.filter(dbpedia_uri=unicode(t[2]))
+ if tagqs:
+ TagLinks.objects.get_or_create(subject=tag, object=tagqs[0])
+
+ ref_label = labels['fr'] if 'fr' in labels else labels['en'] if 'en' in labels else labels.values()[0] if len(labels) > 0 else tag.label
+ ref_abstract = abstracts['fr'] if 'fr' in abstracts else abstracts['en'] if 'en' in abstracts else abstracts.values()[0] if len(abstracts) > 0 else None
+ for lang in settings.LANGUAGES:
+ if lang[0] not in labels:
+ labels[lang[0]]= ref_label
+ if lang[0] not in abstracts:
+ abstracts[lang[0]] = ref_abstract
+
+ dbfield , created = DbpediaFields.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri,tag=tag, defaults={'abstract':ref_abstract, 'thumbnail':thumbnail, 'label':ref_label}) #@UndefinedVariable
+ if not created:
+ dbfield.abstract = ref_abstract
+ dbfield.thumbnail = thumbnail
+ dbfield.label = ref_label
+ dbfield.save()
+ DbpediaFieldsTranslation.objects.filter(master=dbfield).delete()
+
+ consolidated_trans = {}
+ for lang,label in labels.iteritems():
+ consolidated_trans[lang] = [label,ref_abstract]
+ for lang,abstract in abstracts.iteritems():
+ if lang in consolidated_trans:
+ consolidated_trans[lang][1] = abstract
+ else:
+ consolidated_trans[lang] = [ref_label, abstract]
+
+ for lang, trans in consolidated_trans.iteritems():
+ label, abstract = tuple(trans)
+ DbpediaFieldsTranslation.objects.create(master=dbfield, language_code=lang, label=label, abstract=abstract)
+
+
+ except Exception as e:
+ print "\nError processing resource %s : %s" %(rdf_uri,unicode(e))
+ traceback.print_exception(type(e), e, sys.exc_info()[2])
+
+
+
+
+
+