diff -r 75f8f05f9a60 -r dd6578e36a57 web/hdalab/management/commands/query_dbpedia.py --- a/web/hdalab/management/commands/query_dbpedia.py Fri Mar 02 23:25:56 2012 +0100 +++ b/web/hdalab/management/commands/query_dbpedia.py Sun Mar 11 03:27:09 2012 +0100 @@ -9,6 +9,7 @@ from django.core.management.color import no_style from optparse import make_option +from django.conf import settings from django.db.models import Count from django.db import transaction from hdabo.models import Tag @@ -17,6 +18,10 @@ from rdflib.graph import Graph from rdflib import URIRef import re +import traceback +import sys +from hdalab.models.dataviz import DbpediaFieldsTranslation +from django import db class Command(NoArgsCommand): ''' @@ -62,7 +67,7 @@ ) def handle_noargs(self, **options): - + self.style = no_style() self.interactive = options.get('interactive', True) @@ -104,6 +109,11 @@ count = queryset.count() + if count == 0: + print "No tag to query : exit." + return + + if not self.force and self.interactive: confirm = raw_input("You have requested to query and replace the dbpedia information for %d tags.\n Are you sure you want to do this? \nType 'yes' to continue, or 'no' to cancel: " % (count)) else: @@ -116,7 +126,7 @@ writer = None for i,tag in enumerate(queryset): writer = show_progress(i+1, count, tag.label, 50, writer) - + db.reset_queries() rdf_uri = re.sub('\/resource\/', "/data/", tag.dbpedia_uri) + ".n3" g = Graph() try : @@ -124,16 +134,16 @@ with transaction.commit_on_success(): - abstract = None - label = None + abstracts = {} + labels = {} thumbnail = None for t in g: if t[1] == URIRef(u'http://dbpedia.org/ontology/abstract') and t[2] is not None \ - and hasattr(t[2], 'language') and (t[2].language == u"fr" or (abstract is None and t[2].language == u"en")): - abstract = unicode(t[2]) + and hasattr(t[2], 'language'): + abstracts[t[2].language] = unicode(t[2]) if t[1] == URIRef(u'http://www.w3.org/2000/01/rdf-schema#label') and t[2] is not None \ - and hasattr(t[2], 'language') and (t[2].language == u"fr" or (label is None and t[2].language == u"en")): - label = unicode(t[2]) + and hasattr(t[2], 'language'): + labels[t[2].language] = unicode(t[2]) if t[1] == URIRef(u'http://dbpedia.org/ontology/thumbnail') and t[2] is not None: thumbnail = unicode(t[2]) if u'http://dbpedia.org/resource' in t[2]: @@ -141,15 +151,40 @@ if tagqs: TagLinks.objects.get_or_create(subject=tag, object=tagqs[0]) - dbfield , created = DbpediaFields.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri,tag=tag, defaults={'abstract':abstract, 'label':label, 'thumbnail':thumbnail}) + ref_label = labels['fr'] if 'fr' in labels else labels['en'] if 'en' in labels else labels.values()[0] if len(labels) > 0 else tag.label + ref_abstract = abstracts['fr'] if 'fr' in abstracts else abstracts['en'] if 'en' in abstracts else abstracts.values()[0] if len(abstracts) > 0 else None + for lang in settings.LANGUAGES: + if lang[0] not in labels: + labels[lang[0]]= ref_label + if lang[0] not in abstracts: + abstracts[lang[0]] = ref_abstract + + dbfield , created = DbpediaFields.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri,tag=tag, defaults={'abstract':ref_abstract, 'thumbnail':thumbnail, 'label':ref_label}) #@UndefinedVariable if not created: - dbfield.abstract = abstract - dbfield.label = label + dbfield.abstract = ref_abstract dbfield.thumbnail = thumbnail + dbfield.label = ref_label dbfield.save() + DbpediaFieldsTranslation.objects.filter(master=dbfield).delete() + + consolidated_trans = {} + for lang,label in labels.iteritems(): + consolidated_trans[lang] = [label,ref_abstract] + for lang,abstract in abstracts.iteritems(): + if lang in consolidated_trans: + consolidated_trans[lang][1] = abstract + else: + consolidated_trans[lang] = [ref_label, abstract] + + for lang, trans in consolidated_trans.iteritems(): + label, abstract = tuple(trans) + DbpediaFieldsTranslation.objects.create(master=dbfield, language_code=lang, label=label, abstract=abstract) + except Exception as e: print "\nError processing resource %s : %s" %(rdf_uri,unicode(e)) + traceback.print_exception(type(e), e, sys.exc_info()[2]) +