diff -r 4a3899b6a7ed -r 766af1228b05 server/src/app/Repositories/RdfDocumentRepository.php --- a/server/src/app/Repositories/RdfDocumentRepository.php Sun Oct 16 22:23:31 2016 +0530 +++ b/server/src/app/Repositories/RdfDocumentRepository.php Sun Oct 16 23:19:57 2016 +0530 @@ -9,6 +9,7 @@ use CorpusParole\Libraries\CorpusParoleException; use CorpusParole\Libraries\Utils; use CorpusParole\Libraries\Sparql\SparqlClient; +use CorpusParole\Libraries\Filters\CorpusFilterManager; use CorpusParole\Services\LexvoResolverInterface; @@ -18,6 +19,8 @@ use Illuminate\Pagination\LengthAwarePaginator; use Illuminate\Pagination\Paginator; +use Es; + /** * Implement the DocumentRepository using EasyRdf * TODO: certainly split the transaction management (+add, +delete +transaction ) to an external class -> for this extend the sparql client. @@ -31,18 +34,19 @@ " ?title". " ?issued". " ?modified". - " ?lang". + " (group_concat(distinct ?language;separator=\", \") as ?lang) ". " (group_concat(distinct ?publisher;separator=\", \") as ?publishers) ". " WHERE {". - " GRAPH ?uri { ?doc a .". - " ?doc ?title.". - " OPTIONAL {?doc ?lang.} ". - " OPTIONAL {?doc ?issued.} ". - " OPTIONAL {?doc ?modified.} ". - " OPTIONAL {?doc ?publisher.} }". + " GRAPH ?uri { ?doc a .". + " ?doc ?title.". + " OPTIONAL {?doc ?language.} ". + " OPTIONAL {?doc ?issued.} ". + " OPTIONAL {?doc ?modified.} ". + " OPTIONAL {?doc ?publisher.}". + " }. ". + " %s". " } ". - " GROUP BY ?uri ?doc ?title ?issued ?modified ?lang ". - " ORDER BY ?uri"; + " GROUP BY ?uri ?doc ?title ?issued ?modified "; const ADDITIONAL_DOC_QUERIES = [ "SELECT". @@ -102,7 +106,14 @@ $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/title", $doc->title); } if(isset($doc->lang)) { - $newGraph->add($doc->doc, "http://purl.org/dc/elements/1.1/language", $doc->lang); + foreach(explode(", ", $doc->lang) as $langStr) { + $langStr = trim($langStr); + if(filter_var($langStr, FILTER_VALIDATE_URL)) { + $newGraph->addResource($doc->doc, "http://purl.org/dc/elements/1.1/language", $langStr); + } else { + $newGraph->addLiteral($doc->doc, "http://purl.org/dc/elements/1.1/language", $langStr); + } + } } if(isset($doc->issued)) { $newGraph->add($doc->doc, "http://purl.org/dc/terms/issued", $doc->issued); @@ -122,26 +133,105 @@ return $newGraph; } - private function queryDocs($offset=null, $limit=null) { + private function queryES($filters=null, $offset=null, $limit=null, $sort=null) { + + if(empty($sort)) { + $sort = ["_doc"]; + } elseif (is_string($sort)) { + $sort = [$sort]; + } + if(is_null($filters)) { + //$filters = ['language' => ["http://lexvo.org/id/iso639-3/oci", "http://lexvo.org/id/iso639-3/bre"]]; + $filters = []; + } + + $qFilterParts = []; + + if(array_key_exists('language', $filters) && !empty($filters['language'])) { + $languages = $filters['language']; + if(is_string($languages)) { + $languages = [ $languages, ]; + } + $qFilterParts[] = CorpusFilterManager::getLanguagesFilterPart($languages); + } + + $query = [ + 'index' => config('corpusparole.elasticsearch_index'), + 'body' => [ + "size" => empty($limit)?0:$limit, + "from" => $offset, + "sort" => $sort + ] + ]; + + if(count($qFilterParts)>0) { + $query['body']['query'] = ['constant_score' => [ + 'filter' => [ + 'bool' => [ + 'must' => $qFilterParts + ] + ] + ] ]; + } + + $esRes = Es::search($query); + + return ['total' => $esRes['hits']['total'], 'documents' => array_map(function($r) { + return $r['_id']; + }, $esRes['hits']['hits'])]; + + } + + /** + * Query docs. + * if $filter is empty or null and $sort is '_graph', the documents list is fetched from the triple store, otherwise, They are fetched from ElasticSearch + */ + private function queryDocs($filters=null, $offset=null, $limit=null, $sort=null) { $resDocs = []; + $limitsClauses = []; + $sortClauseStr = ""; $limitsClausesStr = ""; + $filterUris = ""; - if(!is_null($offset)) { - array_push($limitsClauses, "OFFSET $offset"); - } - if(!is_null($limit)) { - array_push($limitsClauses, "LIMIT $limit"); - } - if(!empty($limitsClauses)) { - $limitsClausesStr = "\n" . join(" ", $limitsClauses); + if(empty($filters) && $sort === "_graph") { + if(!is_null($offset)) { + array_push($limitsClauses, "OFFSET $offset"); + } + if(!is_null($limit)) { + array_push($limitsClauses, "LIMIT $limit"); + } + if(!empty($limitsClauses)) { + $limitsClausesStr = "\n" . join(" ", $limitsClauses); + } + $sortClauseStr = "\n ORDER BY ?uri"; + $total = $this->getCount(); + } else { + $esRes = $this->queryES($filters, $offset, $limit); + // WARNING: we count on the fact that php keep keys order + $total = intval($esRes['total']); + foreach($esRes['documents'] as $esDocId) { + $uri = config('corpusparole.corpus_doc_id_base_uri_prefix').$esDocId; + $resDocs[$uri] = null; + } + if(count($resDocs) > 0) { + $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) "; + } else { + return ['meta' => [ 'total'=> $total ], 'documents' => []]; + } + } - $docs = $this->sparqlClient->query(self::BASE_DOC_QUERY.$limitsClausesStr); + + $sparqlQuery = sprintf(self::BASE_DOC_QUERY.$sortClauseStr.$limitsClausesStr, $filterUris); + + $docs = $this->sparqlClient->query($sparqlQuery); + foreach($docs as $doc) { $graph = $this->getResGraph($doc); if(is_null($graph)) { + Log::debug("NULL GRAPH - odd"); continue; } $uri = $doc->uri->getUri(); @@ -149,10 +239,12 @@ } if(count($resDocs) == 0) { - return []; + return ['meta' => [ 'total'=> $total ], 'documents' => []]; } - $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) "; + if(empty($filterUris)) { + $filterUris = "FILTER(?uri in (<".join(">, <" , array_keys($resDocs)).">)) "; + } foreach(self::ADDITIONAL_DOC_QUERIES as $query) { $docs = $this->sparqlClient->query(sprintf($query, $filterUris)); @@ -171,11 +263,13 @@ } } - return array_map(function($g) { return new DocumentResult($g->getUri(), $g); }, array_values($resDocs)); + $documentsResults = array_map(function($g) { return new DocumentResult($g->getUri(), $g); }, array_values($resDocs)); + + return ['meta' => [ 'total'=> $total ], 'documents' => $documentsResults]; } public function all() { - return $this->queryDocs(); + return $this->queryDocs(null, null, null, "_graph")['documents']; } public function get($id, $short=false) { @@ -233,10 +327,19 @@ } } - public function getCount() { - $res = $this->sparqlClient->query("SELECT (COUNT (DISTINCT ?g) as ?count) WHERE { GRAPH ?g { ?s a } }"); - assert(!is_null($res) && $res->count()==1); - return $res[0]->count->getValue(); + public function getCount($filters=null) { + $count = 0; + if(empty($filters)) { + $res = $this->sparqlClient->query("SELECT (COUNT (DISTINCT ?g) as ?count) WHERE { GRAPH ?g { ?s a } }"); + assert(!is_null($res) && count($res)==1); + $count = intval($res[0]->count->getValue()); + } else { + $esRes = $this->queryES($filters, 0, 0); + $count = intval($esRes['hits']['total']); + } + + return $count; + } //SELECT ?g WHERE { GRAPH ?g { ?s ?p ?o } } @@ -250,6 +353,19 @@ */ public function paginateAll($perPage = 15, $pageName = 'page', $page = null) { + return $this->paginate(null, $perPage, $pageName, null); + } + + /** + * Paginate filtered document as a paginator. + * + * @param array $filters + * @param int $perPage + * @param string $pageName + * @return \Illuminate\Contracts\Pagination\LengthAwarePaginator + */ + public function paginate($filters = null, $perPage = 15, $pageName = 'page', $page = null, $sort=null) { + assert(is_numeric($perPage)); if(is_null($page)) { @@ -258,18 +374,18 @@ assert(is_null($page) || is_numeric($page)); - $total = $this->getCount(); - $offset = max(0,($page - 1) * $perPage); - $results = $this->queryDocs($offset, $perPage); + $results = $this->queryDocs($filters, $offset, $perPage, $sort); - return new LengthAwarePaginator($results, $total, $perPage, $page, [ + return new LengthAwarePaginator($results['documents'], $results['meta']['total'], $perPage, $page, [ 'path' => Paginator::resolveCurrentPath(), 'pageName' => $pageName, ]); + } + /** * Resolve lexvo id for all documents in the list * this allow to optimise the call of lexvo repository @@ -280,8 +396,10 @@ $languageIds = []; #get the list pf language needing resolving foreach ($docList as $doc) { - if($doc->getLanguageValue() && is_null($doc->getLanguageResolved())) { - $languageIds[$doc->getLanguageValue()] = true; + if(!empty($doc->getLanguagesValue()) && is_null($doc->getLanguagesResolved())) { + foreach($doc->getLanguagesValue() as $lang) { + $languageIds[$lang]=true; + } } } @@ -289,8 +407,12 @@ $langNames = $this->lexvoResolver->getNames(array_keys($languageIds)); foreach ($docList as $doc) { - if($doc->getLanguageValue() && is_null($doc->getLanguageResolved())) { - $doc->setLanguageResolved($langNames[$doc->getLanguageValue()]); + if(!empty($doc->getLanguagesValue()) && is_null($doc->getLanguagesResolved())) { + $langResolved = []; + foreach($doc->getLanguagesValue() as $lang) { + $langResolved[] = $langNames[$lang]; + } + $doc->setLanguageResolved($langResolved); } }