22 * Implement the DocumentRepository using EasyRdf |
22 * Implement the DocumentRepository using EasyRdf |
23 * TODO: certainly split the transaction management (+add, +delete +transaction ) to an external class -> for this extend the sparql client. |
23 * TODO: certainly split the transaction management (+add, +delete +transaction ) to an external class -> for this extend the sparql client. |
24 */ |
24 */ |
25 class RdfDocumentRepository implements DocumentRepository { |
25 class RdfDocumentRepository implements DocumentRepository { |
26 |
26 |
27 const ALL_QUERIES = [ |
27 const BASE_DOC_QUERY |
28 "SELECT". |
28 = "SELECT". |
29 " ?uri". |
29 " ?uri". |
30 " ?doc". |
30 " ?doc". |
31 " ?title". |
31 " ?title". |
32 " ?issued". |
32 " ?issued". |
33 " ?modified". |
33 " ?modified". |
34 " ?lang". |
34 " ?lang". |
35 " (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ". |
35 " (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ". |
36 "WHERE {". |
36 " WHERE {". |
37 "GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.". |
37 " GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.". |
38 " ?doc <http://purl.org/dc/elements/1.1/title> ?title.". |
38 " ?doc <http://purl.org/dc/elements/1.1/title> ?title.". |
39 " OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ". |
39 " OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ". |
40 " OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ". |
40 " OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ". |
41 " OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} ". |
41 " OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} ". |
42 " OPTIONAL {?doc <http://purl.org/dc/elements/1.1/publisher> ?publisher.} }". |
42 " OPTIONAL {?doc <http://purl.org/dc/elements/1.1/publisher> ?publisher.} }". |
43 "} ". |
43 " } ". |
44 "GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ". |
44 " GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ". |
45 "ORDER BY ?uri", |
45 " ORDER BY ?uri"; |
46 |
46 |
|
47 const ADDITIONAL_DOC_QUERIES = [ |
47 "SELECT". |
48 "SELECT". |
48 " ?uri". |
49 " ?uri". |
49 " ?doc". |
50 " ?doc". |
50 " (sample(distinct ?ext) as ?extent) ". |
51 " (sample(distinct ?ext) as ?extent) ". |
51 "WHERE {". |
52 "WHERE {". |
52 " GRAPH ?uri {". |
53 " GRAPH ?uri {". |
53 " ?s a <http://www.europeana.eu/schemas/edm/WebResource>. ". |
54 " ?s a <http://www.europeana.eu/schemas/edm/WebResource>. ". |
54 " ?uri <http://www.europeana.eu/schemas/edm/isShownBy> ?s. ". |
55 " ?uri <http://www.europeana.eu/schemas/edm/isShownBy> ?s. ". |
55 " ?uri <http://www.europeana.eu/schemas/edm/aggregatedCHO> ?doc. ". |
56 " ?uri <http://www.europeana.eu/schemas/edm/aggregatedCHO> ?doc. ". |
56 " OPTIONAL {?s <http://purl.org/dc/terms/extent> ?ext.}". |
57 " OPTIONAL {?s <http://purl.org/dc/terms/extent> ?ext.}". |
57 " }". |
58 " }. ". |
|
59 " %s". |
58 "} ". |
60 "} ". |
59 "GROUP BY ?uri ?doc ". |
61 "GROUP BY ?uri ?doc", |
60 "ORDER BY ?uri" |
62 |
|
63 "SELECT". |
|
64 " ?uri". |
|
65 " ?doc". |
|
66 " (sample(distinct str(?s)) as ?transcript_url) ". |
|
67 "WHERE {". |
|
68 " GRAPH ?uri {". |
|
69 " ?s a <http://www.europeana.eu/schemas/edm/WebResource>. ". |
|
70 " ?uri <http://www.europeana.eu/schemas/edm/aggregatedCHO> ?doc. ". |
|
71 " OPTIONAL {?s <http://purl.org/dc/elements/1.1/format> ?f.} ". |
|
72 " }. ". |
|
73 " FILTER(str(?f) IN ( \"application/xml\", \"application/pdf\" )). ". |
|
74 " %s". |
|
75 "} ". |
|
76 "GROUP BY ?uri ?doc" |
61 ]; |
77 ]; |
62 |
78 |
63 private $sparqlClient; |
79 private $sparqlClient; |
64 private $lexvoResolver; |
80 private $lexvoResolver; |
65 |
81 |
71 public function getSparqlClient() { |
87 public function getSparqlClient() { |
72 return $this->sparqlClient; |
88 return $this->sparqlClient; |
73 } |
89 } |
74 |
90 |
75 private function getResGraph($doc) { |
91 private function getResGraph($doc) { |
|
92 |
|
93 if(empty((array)$doc)) { |
|
94 return null; |
|
95 } |
|
96 |
76 $newGraph = new Graph($doc->uri->getUri()); |
97 $newGraph = new Graph($doc->uri->getUri()); |
77 $newGraph->add($doc->uri, "rdf:type", $newGraph->resource("http://www.openarchives.org/ore/terms/Aggregation")); |
98 $newGraph->add($doc->uri, "rdf:type", $newGraph->resource("http://www.openarchives.org/ore/terms/Aggregation")); |
78 $newGraph->add($doc->uri, "http://www.europeana.eu/schemas/edm/aggregatedCHO", $doc->doc); |
99 $newGraph->add($doc->uri, "http://www.europeana.eu/schemas/edm/aggregatedCHO", $doc->doc); |
79 $newGraph->add($doc->doc, "rdf:type", $newGraph->resource("http://www.europeana.eu/schemas/edm/ProvidedCHO")); |
100 $newGraph->add($doc->doc, "rdf:type", $newGraph->resource("http://www.europeana.eu/schemas/edm/ProvidedCHO")); |
80 if(isset($doc->title)) { |
101 if(isset($doc->title)) { |
93 $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/publisher", $doc->publishers); |
114 $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/publisher", $doc->publishers); |
94 } |
115 } |
95 if(isset($doc->extent)) { |
116 if(isset($doc->extent)) { |
96 $newGraph->add($doc->doc, "http://purl.org/dc/terms/extent", $doc->extent); |
117 $newGraph->add($doc->doc, "http://purl.org/dc/terms/extent", $doc->extent); |
97 } |
118 } |
|
119 if(isset($doc->transcript_url)) { |
|
120 $newGraph->add($doc->doc, config('corpusparole.corpus_ontology_url').'transcript', $doc->transcript_url); |
|
121 } |
98 return $newGraph; |
122 return $newGraph; |
99 } |
123 } |
100 |
124 |
101 private function queryDocs($queries) { |
125 private function queryDocs($offset=null, $limit=null) { |
102 |
126 |
103 $resDocs = []; |
127 $resDocs = []; |
104 |
128 $limitsClauses = []; |
105 foreach($queries as $query) { |
129 $limitsClausesStr = ""; |
106 $docs = $this->sparqlClient->query($query); |
130 |
|
131 if(!is_null($offset)) { |
|
132 array_push($limitsClauses, "OFFSET $offset"); |
|
133 } |
|
134 if(!is_null($limit)) { |
|
135 array_push($limitsClauses, "LIMIT $limit"); |
|
136 } |
|
137 if(!empty($limitsClauses)) { |
|
138 $limitsClausesStr = "\n" . join(" ", $limitsClauses); |
|
139 } |
|
140 |
|
141 $docs = $this->sparqlClient->query(self::BASE_DOC_QUERY.$limitsClausesStr); |
|
142 foreach($docs as $doc) { |
|
143 $graph = $this->getResGraph($doc); |
|
144 if(is_null($graph)) { |
|
145 continue; |
|
146 } |
|
147 $uri = $doc->uri->getUri(); |
|
148 $resDocs[$uri] = $graph; |
|
149 } |
|
150 |
|
151 if(count($resDocs) == 0) { |
|
152 return []; |
|
153 } |
|
154 |
|
155 $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) "; |
|
156 |
|
157 foreach(self::ADDITIONAL_DOC_QUERIES as $query) { |
|
158 $docs = $this->sparqlClient->query(sprintf($query, $filterUris)); |
107 foreach($docs as $doc) { |
159 foreach($docs as $doc) { |
108 $graph = $this->getResGraph($doc); |
160 $graph = $this->getResGraph($doc); |
|
161 if(is_null($graph)) { |
|
162 continue; |
|
163 } |
109 |
164 |
110 $uri = $doc->uri->getUri(); |
165 $uri = $doc->uri->getUri(); |
111 if(array_key_exists($uri, $resDocs)) { |
166 if(array_key_exists($uri, $resDocs)) { |
112 $resDocs[$uri] = Utils::mergeGraphs($resDocs[$uri], $graph); |
167 $resDocs[$uri] = Utils::mergeGraphs($resDocs[$uri], $graph); |
113 } else { |
168 } else { |
114 $resDocs[$uri] = $graph; |
169 $resDocs[$uri] = $graph; |
115 } |
170 } |
116 |
|
117 } |
171 } |
118 } |
172 } |
119 |
173 |
120 return array_map(function($g) { return new DocumentResult($g->getUri(), $g); }, array_values($resDocs)); |
174 return array_map(function($g) { return new DocumentResult($g->getUri(), $g); }, array_values($resDocs)); |
121 } |
175 } |
122 |
176 |
123 public function all() { |
177 public function all() { |
124 return $this->queryDocs(self::ALL_QUERIES); |
178 return $this->queryDocs(); |
125 } |
179 } |
126 |
180 |
127 public function get($id, bool $short=false) { |
181 public function get($id, bool $short=false) { |
128 |
182 |
129 if(strpos($id, config('corpusparole.corpus_id_scheme')) === 0) { |
183 if(strpos($id, config('corpusparole.corpus_id_scheme')) === 0) { |
157 |
211 |
158 $transactionStarted = $this->sparqlClient->startTransaction(); |
212 $transactionStarted = $this->sparqlClient->startTransaction(); |
159 |
213 |
160 try { |
214 try { |
161 foreach($doc->getDeltaList() as $delta) { |
215 foreach($doc->getDeltaList() as $delta) { |
|
216 $this->sparqlClient->deleteWhere($delta->getDeleteWhere(), $delta->getUri()); |
162 $this->sparqlClient->delete($delta->getDeletedGraph()); |
217 $this->sparqlClient->delete($delta->getDeletedGraph()); |
163 $this->sparqlClient->add($delta->getAddedGraph()); |
218 $this->sparqlClient->add($delta->getAddedGraph()); |
164 } |
219 } |
165 if($transactionStarted) { |
220 if($transactionStarted) { |
166 $transactionStarted = false; |
221 $transactionStarted = false; |
205 |
260 |
206 $total = $this->getCount(); |
261 $total = $this->getCount(); |
207 |
262 |
208 $offset = max(0,($page - 1) * $perPage); |
263 $offset = max(0,($page - 1) * $perPage); |
209 |
264 |
210 $results = $this->queryDocs(array_map(function($q) use ($offset, $perPage) { return $q . "\nOFFSET $offset LIMIT $perPage"; }, self::ALL_QUERIES)); |
265 $results = $this->queryDocs($offset, $perPage); |
211 |
266 |
212 return new LengthAwarePaginator($results, $total, $perPage, $page, [ |
267 return new LengthAwarePaginator($results, $total, $perPage, $page, [ |
213 'path' => Paginator::resolveCurrentPath(), |
268 'path' => Paginator::resolveCurrentPath(), |
214 'pageName' => $pageName, |
269 'pageName' => $pageName, |
215 ]); |
270 ]); |