web/hdalab/management/commands/query_dbpedia.py
changeset 135 dd6578e36a57
parent 119 e3ebe3545f72
child 240 c8627191f2d7
--- a/web/hdalab/management/commands/query_dbpedia.py	Fri Mar 02 23:25:56 2012 +0100
+++ b/web/hdalab/management/commands/query_dbpedia.py	Sun Mar 11 03:27:09 2012 +0100
@@ -9,6 +9,7 @@
 from django.core.management.color import no_style
 
 from optparse import make_option
+from django.conf import settings
 from django.db.models import Count
 from django.db import transaction
 from hdabo.models import Tag
@@ -17,6 +18,10 @@
 from rdflib.graph import Graph
 from rdflib import URIRef
 import re
+import traceback
+import sys
+from hdalab.models.dataviz import DbpediaFieldsTranslation
+from django import db
 
 class Command(NoArgsCommand):
     '''
@@ -62,7 +67,7 @@
     )
     
     def handle_noargs(self, **options):
-        
+                
         self.style = no_style()
         
         self.interactive = options.get('interactive', True)
@@ -104,6 +109,11 @@
 
         count = queryset.count()
         
+        if count == 0:
+            print "No tag to query : exit."
+            return
+            
+            
         if not self.force and self.interactive:
             confirm = raw_input("You have requested to query and replace the dbpedia information for %d tags.\n Are you sure you want to do this? \nType 'yes' to continue, or 'no' to cancel: " % (count))
         else:
@@ -116,7 +126,7 @@
         writer = None
         for i,tag in enumerate(queryset):
             writer = show_progress(i+1, count, tag.label, 50, writer)
-            
+            db.reset_queries()
             rdf_uri = re.sub('\/resource\/', "/data/", tag.dbpedia_uri) + ".n3"            
             g = Graph()
             try :
@@ -124,16 +134,16 @@
             
                 with transaction.commit_on_success():
                     
-                    abstract = None
-                    label = None
+                    abstracts = {}
+                    labels = {}
                     thumbnail = None
                     for t in g:
                         if t[1] == URIRef(u'http://dbpedia.org/ontology/abstract') and t[2] is not None \
-                            and hasattr(t[2], 'language') and (t[2].language == u"fr" or (abstract is None and t[2].language == u"en")):
-                            abstract = unicode(t[2])
+                            and hasattr(t[2], 'language'):
+                            abstracts[t[2].language] = unicode(t[2])
                         if t[1] ==  URIRef(u'http://www.w3.org/2000/01/rdf-schema#label') and t[2] is not None \
-                            and hasattr(t[2], 'language') and (t[2].language == u"fr" or (label is None and t[2].language == u"en")):
-                            label = unicode(t[2]) 
+                            and hasattr(t[2], 'language'):
+                            labels[t[2].language] = unicode(t[2]) 
                         if t[1] == URIRef(u'http://dbpedia.org/ontology/thumbnail') and t[2] is not None:
                             thumbnail = unicode(t[2])
                         if u'http://dbpedia.org/resource' in t[2]:
@@ -141,15 +151,40 @@
                             if tagqs:
                                 TagLinks.objects.get_or_create(subject=tag, object=tagqs[0])                        
                     
-                    dbfield , created = DbpediaFields.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri,tag=tag, defaults={'abstract':abstract, 'label':label, 'thumbnail':thumbnail})
+                    ref_label = labels['fr'] if 'fr' in labels else labels['en'] if 'en' in labels else labels.values()[0] if len(labels) > 0 else tag.label
+                    ref_abstract = abstracts['fr'] if 'fr' in abstracts else abstracts['en'] if 'en' in abstracts else abstracts.values()[0] if len(abstracts) > 0 else None
+                    for lang in settings.LANGUAGES:
+                        if lang[0] not in labels:
+                            labels[lang[0]]= ref_label
+                        if lang[0] not in abstracts:
+                            abstracts[lang[0]] = ref_abstract
+
+                    dbfield , created = DbpediaFields.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri,tag=tag, defaults={'abstract':ref_abstract, 'thumbnail':thumbnail, 'label':ref_label}) #@UndefinedVariable                    
                     if not created:
-                        dbfield.abstract = abstract
-                        dbfield.label = label
+                        dbfield.abstract = ref_abstract
                         dbfield.thumbnail = thumbnail
+                        dbfield.label = ref_label
                         dbfield.save()
+                        DbpediaFieldsTranslation.objects.filter(master=dbfield).delete()
+
+                    consolidated_trans = {}
+                    for lang,label in labels.iteritems():
+                        consolidated_trans[lang] = [label,ref_abstract]
+                    for lang,abstract in abstracts.iteritems():
+                        if lang in consolidated_trans:
+                            consolidated_trans[lang][1] = abstract
+                        else:
+                            consolidated_trans[lang] = [ref_label, abstract] 
+                         
+                    for lang, trans in consolidated_trans.iteritems():
+                        label, abstract = tuple(trans)
+                        DbpediaFieldsTranslation.objects.create(master=dbfield, language_code=lang, label=label, abstract=abstract)
+                    
                     
             except Exception as e:
                 print "\nError processing resource %s : %s" %(rdf_uri,unicode(e))
+                traceback.print_exception(type(e), e, sys.exc_info()[2])
+