7 |
7 |
8 from django.core.management.base import NoArgsCommand |
8 from django.core.management.base import NoArgsCommand |
9 from django.core.management.color import no_style |
9 from django.core.management.color import no_style |
10 |
10 |
11 from optparse import make_option |
11 from optparse import make_option |
|
12 from django.conf import settings |
12 from django.db.models import Count |
13 from django.db.models import Count |
13 from django.db import transaction |
14 from django.db import transaction |
14 from hdabo.models import Tag |
15 from hdabo.models import Tag |
15 from hdalab.models import DbpediaFields, TagLinks |
16 from hdalab.models import DbpediaFields, TagLinks |
16 from hdabo.utils import show_progress |
17 from hdabo.utils import show_progress |
17 from rdflib.graph import Graph |
18 from rdflib.graph import Graph |
18 from rdflib import URIRef |
19 from rdflib import URIRef |
19 import re |
20 import re |
|
21 import traceback |
|
22 import sys |
|
23 from hdalab.models.dataviz import DbpediaFieldsTranslation |
|
24 from django import db |
20 |
25 |
21 class Command(NoArgsCommand): |
26 class Command(NoArgsCommand): |
22 ''' |
27 ''' |
23 query and update wikipedia for tag title. |
28 query and update wikipedia for tag title. |
24 ''' |
29 ''' |
114 return |
124 return |
115 |
125 |
116 writer = None |
126 writer = None |
117 for i,tag in enumerate(queryset): |
127 for i,tag in enumerate(queryset): |
118 writer = show_progress(i+1, count, tag.label, 50, writer) |
128 writer = show_progress(i+1, count, tag.label, 50, writer) |
119 |
129 db.reset_queries() |
120 rdf_uri = re.sub('\/resource\/', "/data/", tag.dbpedia_uri) + ".n3" |
130 rdf_uri = re.sub('\/resource\/', "/data/", tag.dbpedia_uri) + ".n3" |
121 g = Graph() |
131 g = Graph() |
122 try : |
132 try : |
123 g.parse(rdf_uri, format="n3") |
133 g.parse(rdf_uri, format="n3") |
124 |
134 |
125 with transaction.commit_on_success(): |
135 with transaction.commit_on_success(): |
126 |
136 |
127 abstract = None |
137 abstracts = {} |
128 label = None |
138 labels = {} |
129 thumbnail = None |
139 thumbnail = None |
130 for t in g: |
140 for t in g: |
131 if t[1] == URIRef(u'http://dbpedia.org/ontology/abstract') and t[2] is not None \ |
141 if t[1] == URIRef(u'http://dbpedia.org/ontology/abstract') and t[2] is not None \ |
132 and hasattr(t[2], 'language') and (t[2].language == u"fr" or (abstract is None and t[2].language == u"en")): |
142 and hasattr(t[2], 'language'): |
133 abstract = unicode(t[2]) |
143 abstracts[t[2].language] = unicode(t[2]) |
134 if t[1] == URIRef(u'http://www.w3.org/2000/01/rdf-schema#label') and t[2] is not None \ |
144 if t[1] == URIRef(u'http://www.w3.org/2000/01/rdf-schema#label') and t[2] is not None \ |
135 and hasattr(t[2], 'language') and (t[2].language == u"fr" or (label is None and t[2].language == u"en")): |
145 and hasattr(t[2], 'language'): |
136 label = unicode(t[2]) |
146 labels[t[2].language] = unicode(t[2]) |
137 if t[1] == URIRef(u'http://dbpedia.org/ontology/thumbnail') and t[2] is not None: |
147 if t[1] == URIRef(u'http://dbpedia.org/ontology/thumbnail') and t[2] is not None: |
138 thumbnail = unicode(t[2]) |
148 thumbnail = unicode(t[2]) |
139 if u'http://dbpedia.org/resource' in t[2]: |
149 if u'http://dbpedia.org/resource' in t[2]: |
140 tagqs = Tag.objects.filter(dbpedia_uri=unicode(t[2])) |
150 tagqs = Tag.objects.filter(dbpedia_uri=unicode(t[2])) |
141 if tagqs: |
151 if tagqs: |
142 TagLinks.objects.get_or_create(subject=tag, object=tagqs[0]) |
152 TagLinks.objects.get_or_create(subject=tag, object=tagqs[0]) |
143 |
153 |
144 dbfield , created = DbpediaFields.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri,tag=tag, defaults={'abstract':abstract, 'label':label, 'thumbnail':thumbnail}) |
154 ref_label = labels['fr'] if 'fr' in labels else labels['en'] if 'en' in labels else labels.values()[0] if len(labels) > 0 else tag.label |
|
155 ref_abstract = abstracts['fr'] if 'fr' in abstracts else abstracts['en'] if 'en' in abstracts else abstracts.values()[0] if len(abstracts) > 0 else None |
|
156 for lang in settings.LANGUAGES: |
|
157 if lang[0] not in labels: |
|
158 labels[lang[0]]= ref_label |
|
159 if lang[0] not in abstracts: |
|
160 abstracts[lang[0]] = ref_abstract |
|
161 |
|
162 dbfield , created = DbpediaFields.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri,tag=tag, defaults={'abstract':ref_abstract, 'thumbnail':thumbnail, 'label':ref_label}) #@UndefinedVariable |
145 if not created: |
163 if not created: |
146 dbfield.abstract = abstract |
164 dbfield.abstract = ref_abstract |
147 dbfield.label = label |
|
148 dbfield.thumbnail = thumbnail |
165 dbfield.thumbnail = thumbnail |
|
166 dbfield.label = ref_label |
149 dbfield.save() |
167 dbfield.save() |
|
168 DbpediaFieldsTranslation.objects.filter(master=dbfield).delete() |
|
169 |
|
170 consolidated_trans = {} |
|
171 for lang,label in labels.iteritems(): |
|
172 consolidated_trans[lang] = [label,ref_abstract] |
|
173 for lang,abstract in abstracts.iteritems(): |
|
174 if lang in consolidated_trans: |
|
175 consolidated_trans[lang][1] = abstract |
|
176 else: |
|
177 consolidated_trans[lang] = [ref_label, abstract] |
|
178 |
|
179 for lang, trans in consolidated_trans.iteritems(): |
|
180 label, abstract = tuple(trans) |
|
181 DbpediaFieldsTranslation.objects.create(master=dbfield, language_code=lang, label=label, abstract=abstract) |
|
182 |
150 |
183 |
151 except Exception as e: |
184 except Exception as e: |
152 print "\nError processing resource %s : %s" %(rdf_uri,unicode(e)) |
185 print "\nError processing resource %s : %s" %(rdf_uri,unicode(e)) |
|
186 traceback.print_exception(type(e), e, sys.exc_info()[2]) |
|
187 |
153 |
188 |
154 |
189 |
155 |
190 |
156 |
191 |
157 |
192 |