|
1 # -*- coding: utf-8 -*- |
|
2 ''' |
|
3 Created on Jan 30, 2012 |
|
4 |
|
5 @author: ymh |
|
6 ''' |
|
7 |
|
8 from django.core.management.base import NoArgsCommand |
|
9 from django.core.management.color import no_style |
|
10 |
|
11 from optparse import make_option |
|
12 from django.db.models import Count |
|
13 from django.db import transaction |
|
14 from hdabo.models import Tag |
|
15 from hdalab.models import DbpediaFields, TagLinks |
|
16 from hdabo.utils import show_progress |
|
17 from rdflib.graph import Graph |
|
18 from rdflib import URIRef |
|
19 import re |
|
20 |
|
21 class Command(NoArgsCommand): |
|
22 ''' |
|
23 query and update wikipedia for tag title. |
|
24 ''' |
|
25 options = '' |
|
26 help = """query and update wikipedia for tag title.""" |
|
27 |
|
28 option_list = NoArgsCommand.option_list + ( |
|
29 make_option('--all', |
|
30 action='store_true', |
|
31 dest='all', |
|
32 default=False, |
|
33 help='force all tags to be updated, not only those not yet processed'), |
|
34 make_option('--force', |
|
35 action='store_true', |
|
36 dest='force', |
|
37 default=False, |
|
38 help='ask no questions'), |
|
39 make_option('--random', |
|
40 action='store_true', |
|
41 dest='random', |
|
42 default=False, |
|
43 help='randomize query on tags'), |
|
44 make_option('--limit', |
|
45 action='store', |
|
46 type='int', |
|
47 dest='limit', |
|
48 default= -1, |
|
49 help='number of tag to process'), |
|
50 make_option('--start', |
|
51 action='store', |
|
52 type='int', |
|
53 dest='start', |
|
54 default=0, |
|
55 help='number of tag to ignore'), |
|
56 make_option('--tag', |
|
57 action='append', |
|
58 dest='tags', |
|
59 type='string', |
|
60 default=[], |
|
61 help='the tag to query'), |
|
62 ) |
|
63 |
|
64 def handle_noargs(self, **options): |
|
65 |
|
66 self.style = no_style() |
|
67 |
|
68 self.interactive = options.get('interactive', True) |
|
69 |
|
70 self.verbosity = int(options.get('verbosity', '1')) |
|
71 |
|
72 self.force = options.get('force', False) |
|
73 |
|
74 self.limit = options.get("limit", -1) |
|
75 self.start = options.get("start", 0) |
|
76 |
|
77 self.random = options.get('random', False) |
|
78 |
|
79 if self.verbosity > 2: |
|
80 print "option passed : " + repr(options) |
|
81 |
|
82 self.tag_list = options.get("tags", []); |
|
83 |
|
84 queryset = Tag.objects.exclude(dbpedia_uri= None) |
|
85 |
|
86 |
|
87 if self.tag_list: |
|
88 queryset = queryset.filter(label__in=self.tag_list) |
|
89 elif not options.get('all',False): |
|
90 queryset = queryset.annotate(dbfc=Count('dbpedia_fields')).filter(dbfc = 0) |
|
91 |
|
92 if self.random: |
|
93 queryset = queryset.order_by("?") |
|
94 else: |
|
95 queryset = queryset.order_by("label") |
|
96 |
|
97 if self.limit >= 0: |
|
98 queryset = queryset[self.start:self.limit] |
|
99 elif self.start > 0: |
|
100 queryset = queryset[self.start:] |
|
101 |
|
102 if self.verbosity > 2 : |
|
103 print "Tag Query is %s" % (queryset.query) |
|
104 |
|
105 count = queryset.count() |
|
106 |
|
107 if not self.force and self.interactive: |
|
108 confirm = raw_input("You have requested to query and replace the dbpedia information for %d tags.\n Are you sure you want to do this? \nType 'yes' to continue, or 'no' to cancel: " % (count)) |
|
109 else: |
|
110 confirm = 'yes' |
|
111 |
|
112 if confirm != "yes": |
|
113 print "dbpedia query cancelled" |
|
114 return |
|
115 |
|
116 writer = None |
|
117 for i,tag in enumerate(queryset): |
|
118 writer = show_progress(i+1, count, tag.label, 50, writer) |
|
119 |
|
120 rdf_uri = re.sub('\/resource\/', "/data/", tag.dbpedia_uri) + ".n3" |
|
121 g = Graph() |
|
122 try : |
|
123 g.parse(rdf_uri, format="n3") |
|
124 |
|
125 with transaction.commit_on_success(): |
|
126 |
|
127 abstract = None |
|
128 label = None |
|
129 thumbnail = None |
|
130 for t in g: |
|
131 if t[1] == URIRef(u'http://dbpedia.org/ontology/abstract') and t[2] is not None \ |
|
132 and hasattr(t[2], 'language') and (t[2].language == u"fr" or (abstract is None and t[2].language == u"en")): |
|
133 abstract = unicode(t[2]) |
|
134 if t[1] == URIRef(u'http://www.w3.org/2000/01/rdf-schema#label') and t[2] is not None \ |
|
135 and hasattr(t[2], 'language') and (t[2].language == u"fr" or (label is None and t[2].language == u"en")): |
|
136 label = unicode(t[2]) |
|
137 if t[1] == URIRef(u'http://dbpedia.org/ontology/thumbnail') and t[2] is not None: |
|
138 thumbnail = unicode(t[2]) |
|
139 if u'http://dbpedia.org/resource' in t[2]: |
|
140 tagqs = Tag.objects.filter(dbpedia_uri=unicode(t[2])) |
|
141 if tagqs: |
|
142 TagLinks.objects.get_or_create(subject=tag, object=tagqs[0]) |
|
143 |
|
144 dbfield , created = DbpediaFields.objects.get_or_create(dbpedia_uri=tag.dbpedia_uri,tag=tag, defaults={'abstract':abstract, 'label':label, 'thumbnail':thumbnail}) |
|
145 if not created: |
|
146 dbfield.abstract = abstract |
|
147 dbfield.label = label |
|
148 dbfield.thumbnail = thumbnail |
|
149 dbfield.save() |
|
150 |
|
151 except Exception as e: |
|
152 print "\nError processing resource %s : %s" %(rdf_uri,unicode(e)) |
|
153 |
|
154 |
|
155 |
|
156 |
|
157 |