--- a/server/src/app/Repositories/RdfDocumentRepository.php Sun Oct 16 22:23:31 2016 +0530
+++ b/server/src/app/Repositories/RdfDocumentRepository.php Sun Oct 16 23:19:57 2016 +0530
@@ -9,6 +9,7 @@
use CorpusParole\Libraries\CorpusParoleException;
use CorpusParole\Libraries\Utils;
use CorpusParole\Libraries\Sparql\SparqlClient;
+use CorpusParole\Libraries\Filters\CorpusFilterManager;
use CorpusParole\Services\LexvoResolverInterface;
@@ -18,6 +19,8 @@
use Illuminate\Pagination\LengthAwarePaginator;
use Illuminate\Pagination\Paginator;
+use Es;
+
/**
* Implement the DocumentRepository using EasyRdf
* TODO: certainly split the transaction management (+add, +delete +transaction ) to an external class -> for this extend the sparql client.
@@ -31,18 +34,19 @@
" ?title".
" ?issued".
" ?modified".
- " ?lang".
+ " (group_concat(distinct ?language;separator=\", \") as ?lang) ".
" (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ".
" WHERE {".
- " GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
- " ?doc <http://purl.org/dc/elements/1.1/title> ?title.".
- " OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?lang.} ".
- " OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ".
- " OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} ".
- " OPTIONAL {?doc <http://purl.org/dc/elements/1.1/publisher> ?publisher.} }".
+ " GRAPH ?uri { ?doc a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.".
+ " ?doc <http://purl.org/dc/elements/1.1/title> ?title.".
+ " OPTIONAL {?doc <http://purl.org/dc/elements/1.1/language> ?language.} ".
+ " OPTIONAL {?doc <http://purl.org/dc/terms/issued> ?issued.} ".
+ " OPTIONAL {?doc <http://purl.org/dc/terms/modified> ?modified.} ".
+ " OPTIONAL {?doc <http://purl.org/dc/elements/1.1/publisher> ?publisher.}".
+ " }. ".
+ " %s".
" } ".
- " GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ".
- " ORDER BY ?uri";
+ " GROUP BY ?uri ?doc ?title ?issued ?modified ";
const ADDITIONAL_DOC_QUERIES = [
"SELECT".
@@ -102,7 +106,14 @@
$newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/title", $doc->title);
}
if(isset($doc->lang)) {
- $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/language", $doc->lang);
+ foreach(explode(", ", $doc->lang) as $langStr) {
+ $langStr = trim($langStr);
+ if(filter_var($langStr, FILTER_VALIDATE_URL)) {
+ $newGraph->addResource($doc->doc, "http://purl.org/dc/elements/1.1/language", $langStr);
+ } else {
+ $newGraph->addLiteral($doc->doc, "http://purl.org/dc/elements/1.1/language", $langStr);
+ }
+ }
}
if(isset($doc->issued)) {
$newGraph->add($doc->doc, "http://purl.org/dc/terms/issued", $doc->issued);
@@ -122,26 +133,105 @@
return $newGraph;
}
- private function queryDocs($offset=null, $limit=null) {
+ private function queryES($filters=null, $offset=null, $limit=null, $sort=null) {
+
+ if(empty($sort)) {
+ $sort = ["_doc"];
+ } elseif (is_string($sort)) {
+ $sort = [$sort];
+ }
+ if(is_null($filters)) {
+ //$filters = ['language' => ["http://lexvo.org/id/iso639-3/oci", "http://lexvo.org/id/iso639-3/bre"]];
+ $filters = [];
+ }
+
+ $qFilterParts = [];
+
+ if(array_key_exists('language', $filters) && !empty($filters['language'])) {
+ $languages = $filters['language'];
+ if(is_string($languages)) {
+ $languages = [ $languages, ];
+ }
+ $qFilterParts[] = CorpusFilterManager::getLanguagesFilterPart($languages);
+ }
+
+ $query = [
+ 'index' => config('corpusparole.elasticsearch_index'),
+ 'body' => [
+ "size" => empty($limit)?0:$limit,
+ "from" => $offset,
+ "sort" => $sort
+ ]
+ ];
+
+ if(count($qFilterParts)>0) {
+ $query['body']['query'] = ['constant_score' => [
+ 'filter' => [
+ 'bool' => [
+ 'must' => $qFilterParts
+ ]
+ ]
+ ] ];
+ }
+
+ $esRes = Es::search($query);
+
+ return ['total' => $esRes['hits']['total'], 'documents' => array_map(function($r) {
+ return $r['_id'];
+ }, $esRes['hits']['hits'])];
+
+ }
+
+ /**
+ * Query docs.
+ * if $filter is empty or null and $sort is '_graph', the documents list is fetched from the triple store, otherwise, They are fetched from ElasticSearch
+ */
+ private function queryDocs($filters=null, $offset=null, $limit=null, $sort=null) {
$resDocs = [];
+
$limitsClauses = [];
+ $sortClauseStr = "";
$limitsClausesStr = "";
+ $filterUris = "";
- if(!is_null($offset)) {
- array_push($limitsClauses, "OFFSET $offset");
- }
- if(!is_null($limit)) {
- array_push($limitsClauses, "LIMIT $limit");
- }
- if(!empty($limitsClauses)) {
- $limitsClausesStr = "\n" . join(" ", $limitsClauses);
+ if(empty($filters) && $sort === "_graph") {
+ if(!is_null($offset)) {
+ array_push($limitsClauses, "OFFSET $offset");
+ }
+ if(!is_null($limit)) {
+ array_push($limitsClauses, "LIMIT $limit");
+ }
+ if(!empty($limitsClauses)) {
+ $limitsClausesStr = "\n" . join(" ", $limitsClauses);
+ }
+ $sortClauseStr = "\n ORDER BY ?uri";
+ $total = $this->getCount();
+ } else {
+ $esRes = $this->queryES($filters, $offset, $limit);
+ // WARNING: we count on the fact that php keep keys order
+ $total = intval($esRes['total']);
+ foreach($esRes['documents'] as $esDocId) {
+ $uri = config('corpusparole.corpus_doc_id_base_uri_prefix').$esDocId;
+ $resDocs[$uri] = null;
+ }
+ if(count($resDocs) > 0) {
+ $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) ";
+ } else {
+ return ['meta' => [ 'total'=> $total ], 'documents' => []];
+ }
+
}
- $docs = $this->sparqlClient->query(self::BASE_DOC_QUERY.$limitsClausesStr);
+
+ $sparqlQuery = sprintf(self::BASE_DOC_QUERY.$sortClauseStr.$limitsClausesStr, $filterUris);
+
+ $docs = $this->sparqlClient->query($sparqlQuery);
+
foreach($docs as $doc) {
$graph = $this->getResGraph($doc);
if(is_null($graph)) {
+ Log::debug("NULL GRAPH - odd");
continue;
}
$uri = $doc->uri->getUri();
@@ -149,10 +239,12 @@
}
if(count($resDocs) == 0) {
- return [];
+ return ['meta' => [ 'total'=> $total ], 'documents' => []];
}
- $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) ";
+ if(empty($filterUris)) {
+ $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) ";
+ }
foreach(self::ADDITIONAL_DOC_QUERIES as $query) {
$docs = $this->sparqlClient->query(sprintf($query, $filterUris));
@@ -171,11 +263,13 @@
}
}
- return array_map(function($g) { return new DocumentResult($g->getUri(), $g); }, array_values($resDocs));
+ $documentsResults = array_map(function($g) { return new DocumentResult($g->getUri(), $g); }, array_values($resDocs));
+
+ return ['meta' => [ 'total'=> $total ], 'documents' => $documentsResults];
}
public function all() {
- return $this->queryDocs();
+ return $this->queryDocs(null, null, null, "_graph")['documents'];
}
public function get($id, $short=false) {
@@ -233,10 +327,19 @@
}
}
- public function getCount() {
- $res = $this->sparqlClient->query("SELECT (COUNT (DISTINCT ?g) as ?count) WHERE { GRAPH ?g { ?s a <http://www.europeana.eu/schemas/edm/ProvidedCHO> } }");
- assert(!is_null($res) && $res->count()==1);
- return $res[0]->count->getValue();
+ public function getCount($filters=null) {
+ $count = 0;
+ if(empty($filters)) {
+ $res = $this->sparqlClient->query("SELECT (COUNT (DISTINCT ?g) as ?count) WHERE { GRAPH ?g { ?s a <http://www.europeana.eu/schemas/edm/ProvidedCHO> } }");
+ assert(!is_null($res) && count($res)==1);
+ $count = intval($res[0]->count->getValue());
+ } else {
+ $esRes = $this->queryES($filters, 0, 0);
+ $count = intval($esRes['hits']['total']);
+ }
+
+ return $count;
+
}
//SELECT ?g WHERE { GRAPH ?g { ?s ?p ?o } }
@@ -250,6 +353,19 @@
*/
public function paginateAll($perPage = 15, $pageName = 'page', $page = null)
{
+ return $this->paginate(null, $perPage, $pageName, null);
+ }
+
+ /**
+ * Paginate filtered document as a paginator.
+ *
+ * @param array $filters
+ * @param int $perPage
+ * @param string $pageName
+ * @return \Illuminate\Contracts\Pagination\LengthAwarePaginator
+ */
+ public function paginate($filters = null, $perPage = 15, $pageName = 'page', $page = null, $sort=null) {
+
assert(is_numeric($perPage));
if(is_null($page)) {
@@ -258,18 +374,18 @@
assert(is_null($page) || is_numeric($page));
- $total = $this->getCount();
-
$offset = max(0,($page - 1) * $perPage);
- $results = $this->queryDocs($offset, $perPage);
+ $results = $this->queryDocs($filters, $offset, $perPage, $sort);
- return new LengthAwarePaginator($results, $total, $perPage, $page, [
+ return new LengthAwarePaginator($results['documents'], $results['meta']['total'], $perPage, $page, [
'path' => Paginator::resolveCurrentPath(),
'pageName' => $pageName,
]);
+
}
+
/**
* Resolve lexvo id for all documents in the list
* this allow to optimise the call of lexvo repository
@@ -280,8 +396,10 @@
$languageIds = [];
#get the list pf language needing resolving
foreach ($docList as $doc) {
- if($doc->getLanguageValue() && is_null($doc->getLanguageResolved())) {
- $languageIds[$doc->getLanguageValue()] = true;
+ if(!empty($doc->getLanguagesValue()) && is_null($doc->getLanguagesResolved())) {
+ foreach($doc->getLanguagesValue() as $lang) {
+ $languageIds[$lang]=true;
+ }
}
}
@@ -289,8 +407,12 @@
$langNames = $this->lexvoResolver->getNames(array_keys($languageIds));
foreach ($docList as $doc) {
- if($doc->getLanguageValue() && is_null($doc->getLanguageResolved())) {
- $doc->setLanguageResolved($langNames[$doc->getLanguageValue()]);
+ if(!empty($doc->getLanguagesValue()) && is_null($doc->getLanguagesResolved())) {
+ $langResolved = [];
+ foreach($doc->getLanguagesValue() as $lang) {
+ $langResolved[] = $langNames[$lang];
+ }
+ $doc->setLanguageResolved($langResolved);
}
}