|
107
|
1 |
# -*- coding: utf-8 -*- |
|
126
|
2 |
# |
|
131
|
3 |
# Copyright IRI (c) 2013 |
|
126
|
4 |
# |
|
|
5 |
# contact@iri.centrepompidou.fr |
|
|
6 |
# |
|
|
7 |
# This software is governed by the CeCILL-B license under French law and |
|
|
8 |
# abiding by the rules of distribution of free software. You can use, |
|
|
9 |
# modify and/ or redistribute the software under the terms of the CeCILL-B |
|
|
10 |
# license as circulated by CEA, CNRS and INRIA at the following URL |
|
|
11 |
# "http://www.cecill.info". |
|
|
12 |
# |
|
|
13 |
# As a counterpart to the access to the source code and rights to copy, |
|
|
14 |
# modify and redistribute granted by the license, users are provided only |
|
|
15 |
# with a limited warranty and the software's author, the holder of the |
|
|
16 |
# economic rights, and the successive licensors have only limited |
|
|
17 |
# liability. |
|
|
18 |
# |
|
|
19 |
# In this respect, the user's attention is drawn to the risks associated |
|
|
20 |
# with loading, using, modifying and/or developing or reproducing the |
|
|
21 |
# software by the user in light of its specific status of free software, |
|
|
22 |
# that may mean that it is complicated to manipulate, and that also |
|
|
23 |
# therefore means that it is reserved for developers and experienced |
|
|
24 |
# professionals having in-depth computer knowledge. Users are therefore |
|
|
25 |
# encouraged to load and test the software's suitability as regards their |
|
|
26 |
# requirements in conditions enabling the security of their systems and/or |
|
|
27 |
# data to be ensured and, more generally, to use and operate it in the |
|
|
28 |
# same conditions as regards security. |
|
|
29 |
# |
|
|
30 |
# The fact that you are presently reading this means that you have had |
|
|
31 |
# knowledge of the CeCILL-B license and that you accept its terms. |
|
|
32 |
# |
|
107
|
33 |
|
|
|
34 |
from rdflib.plugins.sparql.processor import prepareQuery |
|
|
35 |
from rdflib.term import URIRef |
|
|
36 |
from p4l.models.data import Language, Record |
|
119
|
37 |
from p4l.models import signals |
|
107
|
38 |
|
|
|
39 |
|
|
|
40 |
class QueryCache(object): |
|
|
41 |
def __init__(self, *args, **kwargs): |
|
|
42 |
self.__query_cache = {} |
|
|
43 |
|
|
|
44 |
def get_sparql_query(self, query, namespaces_dict): |
|
|
45 |
return self.__query_cache.get(query, False) \ |
|
|
46 |
or self.__query_cache.setdefault(query, prepareQuery(query, initNs=namespaces_dict)) |
|
|
47 |
|
|
|
48 |
|
|
|
49 |
def convert_bool(val): |
|
|
50 |
if val == True or val == False: |
|
|
51 |
return val |
|
|
52 |
if val is None: |
|
|
53 |
return False |
|
|
54 |
if isinstance(val, basestring): |
|
|
55 |
if len(val) == 0: |
|
|
56 |
return False |
|
|
57 |
if val[0].lower() in ['t','y','1','o']: |
|
|
58 |
return True |
|
|
59 |
else: |
|
|
60 |
return False |
|
|
61 |
return bool(val) |
|
|
62 |
|
|
|
63 |
class RecordParser(object): |
|
|
64 |
|
|
|
65 |
|
|
|
66 |
def __init__(self, query_cache = None): |
|
|
67 |
self.query_cache = None |
|
|
68 |
if self.query_cache is None: |
|
|
69 |
self.query_cache = QueryCache() |
|
|
70 |
|
|
|
71 |
def extract_single_value_form_graph(self, graph, q, bindings={}, index=0, convert=lambda v:unicode(v) if v is not None else None, default=None): |
|
|
72 |
return next(self.extract_multiple_values_from_graph(graph, q, bindings, index, convert), default) |
|
|
73 |
|
|
|
74 |
def extract_multiple_values_from_graph(self, graph, q, bindings={}, index=0, convert=lambda v:unicode(v) if v is not None else None): |
|
|
75 |
|
|
|
76 |
index_list = index |
|
|
77 |
if isinstance(index, int): |
|
|
78 |
index_list = range(index+1) |
|
|
79 |
|
|
|
80 |
if hasattr(convert, '__call__'): |
|
|
81 |
convert_dict = dict((k, convert) for k in index_list) |
|
|
82 |
else: |
|
|
83 |
convert_dict = convert |
|
|
84 |
|
|
|
85 |
convert_dict = dict((k, f if hasattr(f,'__call__') else lambda v:unicode(v) if v is not None else None) for k,f in convert_dict.iteritems()) |
|
|
86 |
|
|
|
87 |
for row in graph.query(self.query_cache.get_sparql_query(q, dict(graph.namespaces())), initBindings=bindings): |
|
|
88 |
if len(row) < len(index_list): |
|
|
89 |
break |
|
|
90 |
else: |
|
|
91 |
res = dict([ (k, convert_dict.get(k, lambda v:unicode(v) if v is not None else None)(v)) for k, v in zip(index_list, row)]) |
|
|
92 |
if isinstance(index, int): |
|
|
93 |
yield res[index] |
|
|
94 |
else: |
|
|
95 |
yield res |
|
|
96 |
|
|
|
97 |
|
|
|
98 |
def convert_bool(self, val): |
|
|
99 |
if val == True or val == False: |
|
|
100 |
return val |
|
|
101 |
if val is None: |
|
|
102 |
return False |
|
|
103 |
if isinstance(val, basestring): |
|
|
104 |
if len(val) == 0: |
|
|
105 |
return False |
|
|
106 |
if val[0].lower() in ['t','y','1','o']: |
|
|
107 |
return True |
|
|
108 |
else: |
|
|
109 |
return False |
|
|
110 |
return bool(val) |
|
|
111 |
|
|
|
112 |
|
|
|
113 |
def add_to_related_collection(self, coll, graph, fields, q, bindings={}, convert=lambda v: unicode(v) if v is not None else None, through_fields=None): |
|
|
114 |
|
|
|
115 |
for val in self.extract_multiple_values_from_graph(graph, q, bindings=bindings, index=fields, convert=convert): |
|
|
116 |
|
|
|
117 |
if through_fields: |
|
|
118 |
new_obj_val = dict([(k,v) for k,v in val.iteritems() if k not in through_fields]) |
|
|
119 |
else: |
|
|
120 |
new_obj_val = val |
|
|
121 |
|
|
|
122 |
if hasattr(coll, 'through'): |
|
|
123 |
new_obj_rel, _ = coll.model.objects.get_or_create(**new_obj_val) |
|
|
124 |
if through_fields: |
|
|
125 |
through_vals = {coll.source_field_name: coll.instance, coll.target_field_name: new_obj_rel} |
|
|
126 |
through_vals.update(dict([(k,v) for k,v in val.iteritems() if k in through_fields])) |
|
|
127 |
coll.through.objects.create(**through_vals) |
|
|
128 |
new_obj = None |
|
|
129 |
else: |
|
|
130 |
new_obj = new_obj_rel |
|
|
131 |
|
|
|
132 |
else: |
|
|
133 |
new_obj = coll.create(**new_obj_val) |
|
|
134 |
|
|
|
135 |
if new_obj: |
|
|
136 |
coll.add(new_obj) |
|
|
137 |
|
|
|
138 |
|
|
|
139 |
|
|
|
140 |
|
|
|
141 |
def build_record(self, graph, delete=True): |
|
|
142 |
|
|
|
143 |
record_uri = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?s WHERE { ?s rdf:type iiep:Record .}") |
|
|
144 |
record_identifier = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:identifier ?o .}", bindings={'s':URIRef(record_uri)}) |
|
|
145 |
|
|
|
146 |
if delete: |
|
|
147 |
Record.objects.filter(identifier=record_identifier).delete() |
|
|
148 |
|
|
|
149 |
record = Record() |
|
|
150 |
record.uri = record_uri |
|
|
151 |
record.identifier = record_identifier |
|
|
152 |
record.notes = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:notes ?o .}", bindings={'s':URIRef(record.uri)}) |
|
|
153 |
record.recordType = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:type ?o .}", bindings={'s':URIRef(record.uri)}) |
|
|
154 |
record.isDocumentPart = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:isDocumentPart ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool, default=False) |
|
|
155 |
record.hidden = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:hidden ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool, default=False) |
|
|
156 |
record.restricted = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:restricted ?o .}", bindings={'s':URIRef(record.uri)}, convert=self.convert_bool, default=False) |
|
|
157 |
record.editionStatement = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:editionStatement ?o .}", bindings={'s':URIRef(record.uri)}) |
|
|
158 |
record.corporateAuthorLabel = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s iiep:corporateAuthorLabel ?o .}", bindings={'s':URIRef(record.uri)}) |
|
|
159 |
|
|
|
160 |
language = self.extract_single_value_form_graph(graph,"SELECT DISTINCT ?o WHERE { ?s dct:language ?o .}", bindings={'s':URIRef(record.uri)}) |
|
|
161 |
if language: |
|
|
162 |
record.language, _ = Language.objects.get_or_create(uri=language) |
|
|
163 |
|
|
|
164 |
record.save() |
|
|
165 |
|
|
|
166 |
self.add_to_related_collection(record.otherLanguages, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:otherLanguage ?o .}", bindings={'s':URIRef(record.uri)}) |
|
|
167 |
self.add_to_related_collection(record.subjects, graph, ['uri'], "SELECT ?o WHERE { ?s dct:subject ?o .}", bindings={'s':URIRef(record.uri)}) |
|
|
168 |
self.add_to_related_collection(record.themes, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:theme ?o .}", bindings={'s':URIRef(record.uri)}) |
|
|
169 |
self.add_to_related_collection(record.countries, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:country ?o .}", bindings={'s':URIRef(record.uri)}) |
|
|
170 |
self.add_to_related_collection(record.authors, graph, ['name'], "SELECT ?o WHERE { ?s iiep:author ?o .}", bindings={'s':URIRef(record.uri)}) |
|
|
171 |
self.add_to_related_collection(record.subjectPersons, graph, ['name'], "SELECT ?o WHERE { ?s iiep:subjectPerson ?o .}", bindings={'s':URIRef(record.uri)}) |
|
|
172 |
self.add_to_related_collection(record.projectNames, graph, ['uri'], "SELECT ?o WHERE { ?s iiep:projectName ?o . }") |
|
|
173 |
self.add_to_related_collection(record.audiences, graph, ['uri'], "SELECT ?o WHERE { ?s dct:audience ?o .}", bindings={'s':URIRef(record.uri)}) |
|
|
174 |
|
|
|
175 |
self.add_to_related_collection( |
|
|
176 |
record.periodicals, |
|
|
177 |
graph, |
|
|
178 |
['label','lang'], |
|
|
179 |
"SELECT DISTINCT ?o ( lang(?o) as ?l) WHERE { ?s iiep:periodical ?o .}", |
|
|
180 |
bindings={'s':URIRef(record.uri)} |
|
|
181 |
) |
|
|
182 |
|
|
|
183 |
self.add_to_related_collection( |
|
|
184 |
record.meetings, |
|
|
185 |
graph, |
|
|
186 |
['label', 'meetingNumber', 'meetingPlace', 'meetingDate', 'meetingYear', 'lang'], |
|
|
187 |
"SELECT ?l ?mn ?mp ?md ?my (lang(COALESCE(?l,?nm, ?mp,?md,?my)) as ?lang) WHERE { [iiep:meeting ?bnode]. OPTIONAL { ?bnode rdfs:label ?l }. OPTIONAL { ?bnode iiep:meetingNumber ?mn }. OPTIONAL { ?bnode iiep:meetingPlace ?mp }. OPTIONAL { ?bnode iiep:meetingDate ?md }. OPTIONAL { ?bnode iiep:meetingYear ?my }}", |
|
|
188 |
convert={'meetingYear' : lambda y: int(y) if y is not None else None} |
|
|
189 |
) |
|
|
190 |
|
|
|
191 |
self.add_to_related_collection( |
|
|
192 |
record.series, |
|
|
193 |
graph, |
|
|
194 |
['title', 'volume', 'lang'], |
|
|
195 |
"SELECT ?t ?vol (lang(COALESCE(?t,?vol)) as ?lang) WHERE { [iiep:serie ?bnode]. OPTIONAL { ?bnode dct:title ?t }. OPTIONAL { ?bnode iiep:volume ?vol } }", |
|
|
196 |
) |
|
|
197 |
|
|
|
198 |
self.add_to_related_collection( |
|
|
199 |
record.subjectCorporateBodies, |
|
|
200 |
graph, |
|
|
201 |
['uri'], |
|
|
202 |
"SELECT ?o WHERE { ?s iiep:subjectCorporateBody ?o. }", |
|
|
203 |
bindings={'s':URIRef(record.uri)} |
|
|
204 |
) |
|
|
205 |
|
|
|
206 |
self.add_to_related_collection( |
|
|
207 |
record.subjectMeetings, |
|
|
208 |
graph, |
|
|
209 |
['label', 'meetingNumber', 'meetingPlace', 'meetingDate', 'meetingYear'], |
|
|
210 |
"SELECT ?l ?mn ?mp ?md ?my WHERE { [iiep:subjectMeeting ?bnode]. OPTIONAL { ?bnode rdfs:label ?l }. OPTIONAL { ?bnode iiep:meetingNumber ?mn }. OPTIONAL { ?bnode iiep:meetingPlace ?mp }. OPTIONAL { ?bnode iiep:meetingDate ?md }. OPTIONAL { ?bnode iiep:meetingYear ?my }}", |
|
|
211 |
convert={'meetingYear' : lambda y: int(y) if y is not None else None} |
|
|
212 |
) |
|
|
213 |
|
|
|
214 |
self.add_to_related_collection( |
|
|
215 |
record.corporateAuthors, |
|
|
216 |
graph, |
|
|
217 |
['uri'], |
|
|
218 |
"SELECT ?o WHERE { ?s iiep:corporateAuthor ?o.}", |
|
|
219 |
bindings={'s':URIRef(record.uri)} |
|
|
220 |
) |
|
|
221 |
|
|
|
222 |
self.add_to_related_collection( |
|
|
223 |
record.issns, |
|
|
224 |
graph, |
|
|
225 |
['issn', 'lang'], |
|
|
226 |
"SELECT ?issn (lang(COALESCE(?issn)) as ?lang) WHERE { ?s iiep:issn ?issn . }", |
|
|
227 |
bindings={'s':URIRef(record.uri)}, |
|
|
228 |
) |
|
|
229 |
|
|
|
230 |
self.add_to_related_collection( |
|
|
231 |
record.isbns, |
|
|
232 |
graph, |
|
|
233 |
['isbn', 'lang'], |
|
|
234 |
"SELECT ?isbn (lang(COALESCE(?isbn)) as ?lang) WHERE { ?s iiep:isbn ?isbn . }", |
|
|
235 |
bindings={'s':URIRef(record.uri)}, |
|
|
236 |
) |
|
|
237 |
|
|
|
238 |
self.add_to_related_collection( |
|
|
239 |
record.documentCodes, |
|
|
240 |
graph, |
|
|
241 |
['documentCode', 'lang'], |
|
|
242 |
"SELECT ?c (lang(COALESCE(?c)) as ?lang) WHERE { ?s iiep:documentCode ?c . }", |
|
|
243 |
bindings={'s':URIRef(record.uri)}, |
|
|
244 |
) |
|
|
245 |
|
|
|
246 |
self.add_to_related_collection( |
|
|
247 |
record.titles, |
|
|
248 |
graph, |
|
|
249 |
['title', 'lang'], |
|
|
250 |
"SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s dct:title ?t . }", |
|
|
251 |
bindings={'s':URIRef(record.uri)}, |
|
|
252 |
) |
|
|
253 |
|
|
|
254 |
self.add_to_related_collection( |
|
|
255 |
record.abstracts, |
|
|
256 |
graph, |
|
|
257 |
['abstract', 'lang'], |
|
|
258 |
"SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s dct:abstract ?t . }", |
|
|
259 |
bindings={'s':URIRef(record.uri)}, |
|
|
260 |
) |
|
|
261 |
|
|
|
262 |
self.add_to_related_collection( |
|
|
263 |
record.addedTitles, |
|
|
264 |
graph, |
|
|
265 |
['title', 'lang'], |
|
|
266 |
"SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s iiep:addedTitle ?t . }", |
|
|
267 |
bindings={'s':URIRef(record.uri)}, |
|
|
268 |
) |
|
|
269 |
|
|
|
270 |
self.add_to_related_collection( |
|
|
271 |
record.titlesMainDocument, |
|
|
272 |
graph, |
|
|
273 |
['title', 'lang'], |
|
|
274 |
"SELECT ?t (lang(COALESCE(?t)) as ?lang) WHERE { ?s iiep:titleMainDocument ?t . }", |
|
|
275 |
bindings={'s':URIRef(record.uri)}, |
|
|
276 |
) |
|
|
277 |
|
|
|
278 |
self.add_to_related_collection( |
|
|
279 |
record.imprints, |
|
|
280 |
graph, |
|
|
281 |
['imprintCity', 'publisher', 'imprintDate', 'lang'], |
|
|
282 |
"SELECT ?c ?p ?d (lang(COALESCE(?c, ?p, ?d)) as ?lang) WHERE { [ iiep:imprint ?bnode ]. OPTIONAL { ?bnode iiep:imprintCity ?c }. OPTIONAL { ?bnode dct:publisher ?p }. OPTIONAL { ?bnode iiep:imprintDate ?d }}", |
|
|
283 |
) |
|
|
284 |
|
|
|
285 |
self.add_to_related_collection( |
|
|
286 |
record.collations, |
|
|
287 |
graph, |
|
|
288 |
['collation', 'lang'], |
|
|
289 |
"SELECT ?c (lang(COALESCE(?c)) as ?lang) WHERE { ?s iiep:collation ?c . }", |
|
|
290 |
bindings={'s':URIRef(record.uri)}, |
|
|
291 |
) |
|
|
292 |
|
|
|
293 |
self.add_to_related_collection( |
|
|
294 |
record.volumeIssues, |
|
|
295 |
graph, |
|
|
296 |
['volume', 'number', 'lang'], |
|
|
297 |
"SELECT ?v ?n (lang(COALESCE(?v, ?n)) as ?lang) WHERE { [ iiep:volumeIssue ?bnode ]. OPTIONAL { ?bnode iiep:volume ?v }. OPTIONAL { ?bnode iiep:number ?n }}", |
|
|
298 |
) |
|
|
299 |
|
|
|
300 |
self.add_to_related_collection( |
|
|
301 |
record.urls, |
|
|
302 |
graph, |
|
|
303 |
['address', 'display'], |
|
|
304 |
"SELECT ?a ?d WHERE { [ iiep:url ?bnode ]. OPTIONAL { ?bnode iiep:address ?a }. OPTIONAL { ?bnode iiep:display ?d }.}", |
|
|
305 |
) |
|
119
|
306 |
|
|
|
307 |
signals.record_saved.send(Record, instance=record, created=True) |
|
107
|
308 |
|
|
|
309 |
return record |