# HG changeset patch # User ymh # Date 1475745272 -7200 # Node ID 084aae09edf4c0cf6de0184135c3547f006cca10 # Parent aefaad270b9bd6675a42c677a2543ab832575d0c correction on importRDF documents + evolution theme controller diff -r aefaad270b9b -r 084aae09edf4 build/build_rpm.sh --- a/build/build_rpm.sh Wed Oct 05 02:31:25 2016 +0200 +++ b/build/build_rpm.sh Thu Oct 06 11:14:32 2016 +0200 @@ -1,6 +1,6 @@ #!/usr/bin/env bash -pushd /vagrant/root/var/www/corpusdelaparole/corpus/ +pushd /vagrant/root/var/www/corpusdelaparole/corpus-back/ echo "---> Launching composer" php composer.phar install --ignore-platform-reqs -o echo "---> Composer done" diff -r aefaad270b9b -r 084aae09edf4 dev/README.md --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/dev/README.md Thu Oct 06 11:14:32 2016 +0200 @@ -0,0 +1,31 @@ +# REMIE Provisioning + +## Bootstraping + +* Copier ubuntu-bootstrap sur le serveur + +* executer le script (sudo): `sudo ./ubuntu-bootstrap.sh`. Ce script installe l'agent puppet et ces dépendances. + +* ajouter l'utilisateur courant au groupe `puppet` + +## Installation +* On prépare sur le serveur un répertoire où les fichiers d'installation seron copiés. Par exemple : +```bash +sudo mkdir -p /var/lib/puppet/provision && sudo chown puppet:puppet /var/lib/puppet/provision && sudo chmod g+sw /var/lib/puppet/provision +``` + +* Copier les fichiers sur le serveur : `rsync --exclude='.git' --exclude='.hg*' -aviuPz provisioning/ :/var/lib/puppet/provision/` + + +## configuration +* copier le template de fichier de configuration dans `/etc/puppet/hiera/custom.yaml`. +```bash +mkdir -p /etc/puppet/hiera && sudo cp /var/lib/puppet/provision/custom.yaml.tmpl /etc/puppet/hiera/custom.yaml +``` + +* complêter le fichier de configuration. Le fichier template contient le minimum de paramêtres nécessaire pour avoir un système fonctionnel. +* dans le fichiet `/var/lib/puppet/provision/hiera.yaml`, mettre la ligne ` :datadir: "/etc/puppet/hiera"` + + +## execution +* dans le répertoire `/var/lib/puppet/provision` lancer la commande `sudo puppet apply --debug default.pp --modulepath=modules --hiera_config=hiera.yaml` diff -r aefaad270b9b -r 084aae09edf4 dev/provisioning/modules/sysconfig/templates/handle/handle_data.sql.sh.erb --- a/dev/provisioning/modules/sysconfig/templates/handle/handle_data.sql.sh.erb Wed Oct 05 02:31:25 2016 +0200 +++ b/dev/provisioning/modules/sysconfig/templates/handle/handle_data.sql.sh.erb Thu Oct 06 11:14:32 2016 +0200 @@ -35,7 +35,7 @@ INSERT INTO \`handles\` VALUES ('<%= @hdl_test_prefix %>/<%= @hdl_prefix_admin %>',300,'HS_PUBKEY',0x$handle_admin_pubkey,0,86400,<%= scope.function_time([]) %>,'',1,1,1,0); -INSERT INTO \`handles\` VALUES ('<%= @hdl_test_prefix %>/<%= @hdl_prefix_admin %>',200,'HS_VLIST','300:0.NA/<%= @hdl_test_prefix %>; 300:<%= @hdl_test_prefix %>/<%= @hdl_prefix_admin %>; 300:<%= @hdl_test_prefix %>/<%= @hdl_prefix_admin %>_DSA',0,86400,<%= scope.function_time([]) %>,'',1,1,1,0); +INSERT INTO \`handles\` VALUES ('<%= @hdl_test_prefix %>/<%= @hdl_prefix_admin %>_DSA',200,'HS_VLIST','300:0.NA/<%= @hdl_test_prefix %>; 300:<%= @hdl_test_prefix %>/<%= @hdl_prefix_admin %>; 300:<%= @hdl_test_prefix %>/<%= @hdl_prefix_admin %>_DSA',0,86400,<%= scope.function_time([]) %>,'',1,1,1,0); INSERT INTO \`handles\` VALUES ('<%= @hdl_test_prefix %>/<%= @hdl_prefix_admin %>_DSA',300,'HS_PUBKEY',0x$handle_admin_pubkey_dsa,0,86400,<%= scope.function_time([]) %>,'',1,1,1,0); /*!40000 ALTER TABLE \`handles\` ENABLE KEYS */; diff -r aefaad270b9b -r 084aae09edf4 server/src/app/Console/Commands/ImportCocoonRDF.php --- a/server/src/app/Console/Commands/ImportCocoonRDF.php Wed Oct 05 02:31:25 2016 +0200 +++ b/server/src/app/Console/Commands/ImportCocoonRDF.php Thu Oct 06 11:14:32 2016 +0200 @@ -100,8 +100,8 @@ $documentCounts = ['all' => 0, 'unknown' => 0, 'error' => 0]; foreach ($recs as $item) { - - $identifier = (string) $item->xpath('/record/header/identifier')[0]; + $item->registerXPathNamespace('oai', "http://www.openarchives.org/OAI/2.0/"); + $identifier = (string) $item->xpath('/oai:record/oai:header/oai:identifier')[0]; $docRdfUrl = Config::get('corpusparole.cocoon_rdf_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base'))); $message = "$identifier : $docRdfUrl"; if($recs->getNumRetrieved() <= $skip) { diff -r aefaad270b9b -r 084aae09edf4 server/src/app/Console/Commands/IndexDocuments.php --- a/server/src/app/Console/Commands/IndexDocuments.php Wed Oct 05 02:31:25 2016 +0200 +++ b/server/src/app/Console/Commands/IndexDocuments.php Thu Oct 06 11:14:32 2016 +0200 @@ -4,6 +4,7 @@ use Illuminate\Console\Command; use EasyRdf\Resource; +use EasyRdf\Literal; use GuzzleHttp\Client; use CorpusParole\Libraries\Utils; @@ -11,6 +12,7 @@ use CorpusParole\Libraries\CocoonUtils; use CorpusParole\Models\GeonamesHierarchy; use CorpusParole\Services\BnfResolverInterface; +use CorpusParole\Services\LexvoResolverInterface; use Es; class IndexDocuments extends Command @@ -24,7 +26,8 @@ protected $signature = 'corpus-parole:indexDocuments {--limit=0 : index only the first n documents, 0 (default) means index everything } {--no-bulk : index documents one by one instead of using ElasticSearch bulk indexing } - {--step-size=100 : number of documents to retrieve from repository at a time before indexing}'; + {--step-size=100 : number of documents to retrieve from repository at a time before indexing} + {--reset-geo-cache: reset geo cache befr indexing}'; /** * The console command description. @@ -38,10 +41,15 @@ * * @return void */ - public function __construct(DocumentRepository $documentRepository, Client $httpClient, BnfResolverInterface $bnfResolver) + public function __construct( + DocumentRepository $documentRepository, + Client $httpClient, + BnfResolverInterface $bnfResolver, + LexvoResolverInterface $lexvoResolver) { $this->documentRepository = $documentRepository; $this->bnfResolver = $bnfResolver; + $this->lexvoResolver = $lexvoResolver; $this->httpClient = $httpClient; parent::__construct(); } @@ -53,8 +61,12 @@ * * @return int (1 if sucess, 0 if error) */ - private function resetIndex() + private function resetIndex($resetGeoCache) { + if($resetGeoCache) { + // delete all rows in GeonamesHierarchy + GeonamesHierarchy::getQuery()->delete(); + } $indexParams = [ 'index' => env('ELASTICSEARCH_INDEX') ]; @@ -126,7 +138,7 @@ ] )->getBody(); $hjson = json_decode($apiBody); - $hcache = new GeonamesHierarchy; + $hcache = new GeonamesHierarchy(); $hcache->geonamesid = $geonamesid; $hcache->hierarchy = $hjson; $hcache->save(); @@ -173,20 +185,58 @@ private function getSubjects($doc) { $sres = array_reduce($doc->getSubjects(), function($res, $s) { - $m = []; - if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $m) === 1) { + $mBnf = []; + $mLexvo = []; + + if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $mBnf) === 1) { array_push($res, [ - 'uri' => $m[0], - 'code' => $m[1] + 'uri' => $mBnf[0], + 'code' => $mBnf[1], + 'type' => 'bnf' + ]); + } elseif($s instanceof Resource && preg_match(config('corpusparole.lexvo_url_regexp'), $s->getUri(). $mLexvo) === 1) { + array_push($res, [ + 'uri' => $mLexvo[0], + 'code' => $mLexvo[1], + 'type' => 'lxv' ]); } + return $res; }, []); - $labels = $this->bnfResolver->getLabels(array_unique(array_map(function($so) { return $so['uri'];}, $sres))); + $labelsBnf = $this->bnfResolver->getLabels( + array_unique(array_reduce( + $sres, + function($r, $so) { + if($so['type'] === 'bnf') { + array_push($r, $so['uri']); + } + return $r; + },[] + )) + ); + $labelsLexvo = $this->lexvoResolver->getLabels( + array_unique(array_reduce( + $sres, + function($r, $so) { + if($so['type'] === 'lxv') { + array_push($r, $so['uri']); + } + return $r; + },[] + )) + ); - return array_map(function($so) use ($labels) { return [ 'label' => $labels[$so['uri']], 'code' => $so['code'], 'label_code' => $labels[$so['uri']]."|".$so['code'] ]; }, $sres); - + return array_map(function($so) use ($labelsBnf, $labelsLexvo) { + $label = $so['uri']; + if($so['type'] === 'bnf') { + $label = $labelsBnf[$label]; + } elseif ($so['type'] === 'lxv') { + $label = $labelsLexvo[$label]; + } + return [ 'label' => $label, 'code' => $so['code'], 'label_code' => $label."|".$so['type']."|".$so['code'] ]; }, $sres + ); } /** @@ -261,8 +311,9 @@ $stepSize = $this->option('step-size'); $this->comment(' - Indexing with step size of '.$stepSize); + $resetGeoCache = $this->option('reset-geo-cache', false); $this->info('Resetting index...'); - $success = $this->resetIndex(); + $success = $this->resetIndex($resetGeoCache); if($success==1){ $this->comment('Index reset!'); } diff -r aefaad270b9b -r 084aae09edf4 server/src/app/Http/Controllers/Api/ThemeController.php --- a/server/src/app/Http/Controllers/Api/ThemeController.php Wed Oct 05 02:31:25 2016 +0200 +++ b/server/src/app/Http/Controllers/Api/ThemeController.php Thu Oct 06 11:14:32 2016 +0200 @@ -35,10 +35,23 @@ */ public function index(Request $request) { + $filter = $request->input('filter', 'bnf'); $index = $request->input('index', 0); - $limit = $request->input('limit', 0); + $limit = $request->input('limit', config('corpusparole.theme_default_limit')); $sort = $request->input('sort', 'count'); + if($filter === 'bnf') { + $include = ".*\|bnf\|.*"; + } + elseif($filter === 'lexvo') { + $include = ".*\|lxv\|.*"; + } + elseif($filter === 'all') { + $include = false; + } + else { + abort(401,"Value for filter parameter must be in 'bnf', 'lexvo', 'all'"); + } if($sort == "count" || $sort == "descending") { $order_field = "_count"; @@ -78,6 +91,10 @@ ] ]; + if($include) { + $query['body']['aggs']['subjects']['aggs']['subjects']['terms']['include'] = $include; + } + $esRes = Es::search($query); $themes = []; @@ -87,7 +104,7 @@ foreach($bucketList as $bucket) { $parts = explode("|", $bucket['key']); $label = $parts[0]; - $url = config('corpusparole.bnf_ark_base_url').$parts[1]; + $url = ['bnf' => config('corpusparole.bnf_ark_base_url'), 'lxv' => config('corpusparole.lexvo_base_url')][$parts[1]].$parts[2]; $themes[$url] = [ "label" => $label, "count" => $bucket['doc_count'] diff -r aefaad270b9b -r 084aae09edf4 server/src/config/corpusparole.php --- a/server/src/config/corpusparole.php Wed Oct 05 02:31:25 2016 +0200 +++ b/server/src/config/corpusparole.php Thu Oct 06 11:14:32 2016 +0200 @@ -112,7 +112,10 @@ 'viaf_cache_expiration' => 60*24*30, 'viaf_max_ids' => 500, + 'theme_default_limit' => 40, + 'lexvo_base_url' => 'http://lexvo.org/id/iso639-3/', + 'lexvo_url_regexp' => '/http[s]?\:\/\/lexvo\.org\/iso639\-3\/([[:alpha:]]{3})\/?/', 'lexvo_cache_expiration' => 60*24*30, 'lexvo_max_ids' => 50, 'lexvo_sesame_query_url' => $sesameBaseUrl.'repositories/'.env('CORPUSPAROLE_SESAME_LEXVO_REPOSITORY'), diff -r aefaad270b9b -r 084aae09edf4 server/src/gulpfile.js --- a/server/src/gulpfile.js Wed Oct 05 02:31:25 2016 +0200 +++ b/server/src/gulpfile.js Thu Oct 06 11:14:32 2016 +0200 @@ -45,7 +45,7 @@ if(err) { return; } - gulp.src(['**/*','!vendor', '!vendor/**', '!node_modules', '!public/corpus-app', '!public/corpus-app/**','!node_modules/**', '!.env', '!.git*']) + gulp.src(['**/*','!vendor', '!vendor/**', '!node_modules', /*'!public/corpus-app', '!public/corpus-app/**',*/ '!node_modules/**', '!.env', '!.git*']) .pipe(gulp.dest('../../build/root/var/www/corpusdelaparole/corpus-back/')) }); }); diff -r aefaad270b9b -r 084aae09edf4 server/src/tests/Controllers/ThemeControllerTest.php --- a/server/src/tests/Controllers/ThemeControllerTest.php Wed Oct 05 02:31:25 2016 +0200 +++ b/server/src/tests/Controllers/ThemeControllerTest.php Thu Oct 06 11:14:32 2016 +0200 @@ -10,16 +10,10 @@ */ class ThemeControllerTest extends TestCase { - private $sparqlClient; - public function setUp() { parent::setup(); - // create a mock of the post repository interface and inject it into the - // IoC container - $this->sparqlClient = m::mock('CorpusParole\Libraries\Sparql\SparqlClient'); - $this->app->instance('CorpusParole\Libraries\Sparql\SparqlClient', $this->sparqlClient); } public function tearDown() { @@ -27,130 +21,602 @@ parent::tearDown(); } - public function testIndexQuery() { + public function testIndex() { + + $query = [ + 'index' => env('ELASTICSEARCH_INDEX'), + 'body' => [ + 'size' => 0, + 'aggs' => [ + "subjects" => [ + "nested" => [ "path" => "subject" ], + "aggs" => [ + "subjects" => [ + "terms" => [ + "field" => "subject.label_code", + "size" => config('corpusparole.theme_default_limit'), + "order" => [ '_count' => 'desc' ], + "include" => ".*\|bnf\|.*" + ] + ] + ] + ] + ] + ] + ]; - $query = preg_replace('/\s+/', ' ', "select (?o as ?theme) (COUNT(?s) as ?count) where { - ?s a . - ?s ?o . - FILTER (isIRI(?o) && regex(str(?o), '^".config('corpusparole.bnf_ark_base_url')."')) . - } - GROUP BY ?o - ORDER BY DESC(?count)"); + Es::shouldReceive('search') + ->once() + ->with($query) + ->andReturn([ + "took" => 27, + "timed_out" => false, + "_shards" => [ + "total" => 1, + "successful" => 1, + "failed" => 0 + ], + "hits" => [ + "total" => 3011, + "max_score" => 0.0, + "hits" => [ ] + ], + "aggregations" => [ + "subjects" => [ + "doc_count" => 41524, + "subjects" => [ + "doc_count_error_upper_bound" => 0, + "sum_other_doc_count" => 13678, + "buckets" => [ [ + "key" => "professions|bnf|ark:/12148/cb13318415c", + "doc_count" => 1412 + ], [ + "key" => "travail non rémunéré|bnf|ark:/12148/cb16604691s", + "doc_count" => 1092 + ], [ + "key" => "famille|bnf|ark:/12148/cb119339867", + "doc_count" => 1050 + ], [ + "key" => "oiseaux|bnf|ark:/12148/cb11932889r", + "doc_count" => 1003 + ], [ + "key" => "météorologie|bnf|ark:/12148/cb11932496x", + "doc_count" => 1001 + ], [ + "key" => "plantes|bnf|ark:/12148/cb11933145f", + "doc_count" => 995 + ], [ + "key" => "animaux sauvages|bnf|ark:/12148/cb11930908q", + "doc_count" => 989 + ], [ + "key" => "arbres|bnf|ark:/12148/cb11934786x", + "doc_count" => 989 + ] ] + ] + ] + ] + ]); + $this->get('/api/v1/stats/themes/')->assertTrue($this->response->isOk(), $this->response->content()); + $this->seeJsonEquals(["themes" => [ + "http://ark.bnf.fr/ark:/12148/cb13318415c" => ['label' => 'professions', 'count' => 1412 ], + "http://ark.bnf.fr/ark:/12148/cb16604691s" => ['label' => 'travail non rémunéré', 'count' => 1092 ], + "http://ark.bnf.fr/ark:/12148/cb119339867" => ['label' => 'famille', 'count' => 1050 ], + "http://ark.bnf.fr/ark:/12148/cb11932889r" => ['label' => 'oiseaux', 'count' => 1003 ], + "http://ark.bnf.fr/ark:/12148/cb11932496x" => ['label' => 'météorologie', 'count' => 1001 ], + "http://ark.bnf.fr/ark:/12148/cb11933145f" => ['label' => 'plantes', 'count' => 995 ], + "http://ark.bnf.fr/ark:/12148/cb11930908q" => ['label' => 'animaux sauvages', 'count' => 989 ], + "http://ark.bnf.fr/ark:/12148/cb11934786x" => ['label' => 'arbres', 'count' => 989 ] + ]]); - $this->sparqlClient - ->shouldReceive('query') - ->with($query) - ->once() - ->andReturn(new \ArrayIterator([])); - $this->get('/api/v1/stats/themes/'); } - public function testIndexQueryBnf() { + public function testIndexAll() { + + $query = [ + 'index' => env('ELASTICSEARCH_INDEX'), + 'body' => [ + 'size' => 0, + 'aggs' => [ + "subjects" => [ + "nested" => [ "path" => "subject" ], + "aggs" => [ + "subjects" => [ + "terms" => [ + "field" => "subject.label_code", + "size" => config('corpusparole.theme_default_limit'), + "order" => [ '_count' => 'desc' ] + ] + ] + ] + ] + ] + ] + ]; - $query = preg_replace('/\s+/', ' ', "select (?o as ?theme) (COUNT(?s) as ?count) where { - ?s a . - ?s ?o . - FILTER (isIRI(?o) && regex(str(?o), '^".config('corpusparole.bnf_ark_base_url')."')) . - } - GROUP BY ?o - ORDER BY DESC(?count)"); + Es::shouldReceive('search') + ->once() + ->with($query) + ->andReturn([ + "took" => 27, + "timed_out" => false, + "_shards" => [ + "total" => 1, + "successful" => 1, + "failed" => 0 + ], + "hits" => [ + "total" => 3011, + "max_score" => 0.0, + "hits" => [ ] + ], + "aggregations" => [ + "subjects" => [ + "doc_count" => 41524, + "subjects" => [ + "doc_count_error_upper_bound" => 0, + "sum_other_doc_count" => 13678, + "buckets" => [ [ + "key" => "professions|bnf|ark:/12148/cb13318415c", + "doc_count" => 1412 + ], [ + "key" => "travail non rémunéré|bnf|ark:/12148/cb16604691s", + "doc_count" => 1092 + ], [ + "key" => "famille|bnf|ark:/12148/cb119339867", + "doc_count" => 1050 + ], [ + "key" => "oiseaux|bnf|ark:/12148/cb11932889r", + "doc_count" => 1003 + ], [ + "key" => "Français|lxv|fra", + "doc_count" => 1002 + ], [ + "key" => "météorologie|bnf|ark:/12148/cb11932496x", + "doc_count" => 1001 + ], [ + "key" => "plantes|bnf|ark:/12148/cb11933145f", + "doc_count" => 995 + ], [ + "key" => "Breton|lxv|bzh", + "doc_count" => 992 + ], [ + "key" => "animaux sauvages|bnf|ark:/12148/cb11930908q", + "doc_count" => 989 + ], [ + "key" => "arbres|bnf|ark:/12148/cb11934786x", + "doc_count" => 989 + ] ] + ] + ] + ] + ]); + $this->get('/api/v1/stats/themes/?filter=all')->assertTrue($this->response->isOk(), $this->response->content()); + $this->seeJsonEquals(["themes" => [ + "http://ark.bnf.fr/ark:/12148/cb13318415c" => ['label' => 'professions', 'count' => 1412 ], + "http://ark.bnf.fr/ark:/12148/cb16604691s" => ['label' => 'travail non rémunéré', 'count' => 1092 ], + "http://ark.bnf.fr/ark:/12148/cb119339867" => ['label' => 'famille', 'count' => 1050 ], + "http://ark.bnf.fr/ark:/12148/cb11932889r" => ['label' => 'oiseaux', 'count' => 1003 ], + "http://lexvo.org/id/iso639-3/fra" => ['label' => 'Français', 'count' => 1002 ], + "http://ark.bnf.fr/ark:/12148/cb11932496x" => ['label' => 'météorologie', 'count' => 1001 ], + "http://ark.bnf.fr/ark:/12148/cb11933145f" => ['label' => 'plantes', 'count' => 995 ], + "http://lexvo.org/id/iso639-3/bzh" => ['label' => 'Breton', 'count' => 992 ], + "http://ark.bnf.fr/ark:/12148/cb11930908q" => ['label' => 'animaux sauvages', 'count' => 989 ], + "http://ark.bnf.fr/ark:/12148/cb11934786x" => ['label' => 'arbres', 'count' => 989 ] + ]]); - $this->sparqlClient - ->shouldReceive('query') - ->with($query) - ->once() - ->andReturn(new \ArrayIterator([])); - $this->get('/api/v1/stats/themes/?filter=bnf'); } + public function testIndexLexvo() { - public function testIndexQueryAll() { + $query = [ + 'index' => env('ELASTICSEARCH_INDEX'), + 'body' => [ + 'size' => 0, + 'aggs' => [ + "subjects" => [ + "nested" => [ "path" => "subject" ], + "aggs" => [ + "subjects" => [ + "terms" => [ + "field" => "subject.label_code", + "size" => config('corpusparole.theme_default_limit'), + "order" => [ '_count' => 'desc' ], + "include" => ".*\|lxv\|.*" + ] + ] + ] + ] + ] + ] + ]; + + Es::shouldReceive('search') + ->once() + ->with($query) + ->andReturn([ + "took" => 27, + "timed_out" => false, + "_shards" => [ + "total" => 1, + "successful" => 1, + "failed" => 0 + ], + "hits" => [ + "total" => 3011, + "max_score" => 0.0, + "hits" => [ ] + ], + "aggregations" => [ + "subjects" => [ + "doc_count" => 41524, + "subjects" => [ + "doc_count_error_upper_bound" => 0, + "sum_other_doc_count" => 13678, + "buckets" => [ [ + "key" => "Français|lxv|fra", + "doc_count" => 1002 + ], [ + "key" => "Breton|lxv|bzh", + "doc_count" => 992 + ] ] + ] + ] + ] + ]); + $this->get('/api/v1/stats/themes/?filter=lexvo')->assertTrue($this->response->isOk(), $this->response->content()); + $this->seeJsonEquals(["themes" => [ + "http://lexvo.org/id/iso639-3/fra" => ['label' => 'Français', 'count' => 1002 ], + "http://lexvo.org/id/iso639-3/bzh" => ['label' => 'Breton', 'count' => 992 ] + ]]); + + } - $query = preg_replace('/\s+/', ' ', "select (?o as ?theme) (COUNT(?s) as ?count) where { - ?s a . - ?s ?o . - } - GROUP BY ?o - ORDER BY DESC(?count)"); + private function getOrderQuery($order) { + return [ + 'index' => env('ELASTICSEARCH_INDEX'), + 'body' => [ + 'size' => 0, + 'aggs' => [ + "subjects" => [ + "nested" => [ "path" => "subject" ], + "aggs" => [ + "subjects" => [ + "terms" => [ + "field" => "subject.label_code", + "size" => config('corpusparole.theme_default_limit'), + "order" => $order, + "include" => ".*\|bnf\|.*" + ] + ] + ] + ] + ] + ] + ]; + + } - $this->sparqlClient - ->shouldReceive('query') - ->with($query) + private function getRespSort() { + return [ + "took" => 27, + "timed_out" => false, + "_shards" => [ + "total" => 1, + "successful" => 1, + "failed" => 0 + ], + "hits" => [ + "total" => 3011, + "max_score" => 0.0, + "hits" => [ ] + ], + "aggregations" => [ + "subjects" => [ + "doc_count" => 41524, + "subjects" => [ + "doc_count_error_upper_bound" => 0, + "sum_other_doc_count" => 13678, + "buckets" => [ ] + ] + ] + ] + ]; + } + + public function testIndexSort() { + + Es::shouldReceive('search') ->once() - ->andReturn(new \ArrayIterator([])); - $this->get('/api/v1/stats/themes/?filter=all'); + ->with($this->getOrderQuery(['_count' => 'desc'])) + ->andReturn($this->getRespSort()); + $this->get('/api/v1/stats/themes/')->assertTrue($this->response->isOk(), $this->response->content()); + + } + + public function testIndexSortReverseCount() { + Es::shouldReceive('search') + ->once() + ->with($this->getOrderQuery(['_count' => 'asc'])) + ->andReturn($this->getRespSort()); + $this->get('/api/v1/stats/themes/?sort=-count')->assertTrue($this->response->isOk(), $this->response->content()); + } + + public function testIndexSortLabel() { + Es::shouldReceive('search') + ->once() + ->with($this->getOrderQuery(['_term' => 'asc'])) + ->andReturn($this->getRespSort()); + $this->get('/api/v1/stats/themes/?sort=label')->assertTrue($this->response->isOk(), $this->response->content()); + } + + public function testIndexSortAlphabetical() { + Es::shouldReceive('search') + ->once() + ->with($this->getOrderQuery(['_term' => 'asc'])) + ->andReturn($this->getRespSort()); + $this->get('/api/v1/stats/themes/?sort=alphabetical')->assertTrue($this->response->isOk(), $this->response->content()); + } + + public function testIndexSortLabelReverse() { + Es::shouldReceive('search') + ->once() + ->with($this->getOrderQuery(['_term' => 'desc'])) + ->andReturn($this->getRespSort()); + $this->get('/api/v1/stats/themes/?sort=-label')->assertTrue($this->response->isOk(), $this->response->content()); } - public function testIndexQueryNone() { + public function testIndexLimit() { - $query = preg_replace('/\s+/', ' ', "select (?o as ?theme) (COUNT(?s) as ?count) where { - ?s a . - ?s ?o . - } - GROUP BY ?o - ORDER BY DESC(?count)"); - - $this->sparqlClient - ->shouldReceive('query') - ->with($query) - ->once() - ->andReturn(new \ArrayIterator([])); - $this->get('/api/v1/stats/themes/?filter=none'); - } + $query = [ + 'index' => env('ELASTICSEARCH_INDEX'), + 'body' => [ + 'size' => 0, + 'aggs' => [ + "subjects" => [ + "nested" => [ "path" => "subject" ], + "aggs" => [ + "subjects" => [ + "terms" => [ + "field" => "subject.label_code", + "size" => 3, + "order" => [ '_count' => 'desc' ], + "include" => ".*\|bnf\|.*" + ] + ] + ] + ] + ] + ] + ]; - - public function testIndexQueryEmpty() { + // This is not normally what should be received, but we test that the limit is respected + Es::shouldReceive('search') + ->once() + ->with($query) + ->andReturn([ + "took" => 27, + "timed_out" => false, + "_shards" => [ + "total" => 1, + "successful" => 1, + "failed" => 0 + ], + "hits" => [ + "total" => 3011, + "max_score" => 0.0, + "hits" => [ ] + ], + "aggregations" => [ + "subjects" => [ + "doc_count" => 41524, + "subjects" => [ + "doc_count_error_upper_bound" => 0, + "sum_other_doc_count" => 13678, + "buckets" => [ [ + "key" => "professions|bnf|ark:/12148/cb13318415c", + "doc_count" => 1412 + ], [ + "key" => "travail non rémunéré|bnf|ark:/12148/cb16604691s", + "doc_count" => 1092 + ], [ + "key" => "famille|bnf|ark:/12148/cb119339867", + "doc_count" => 1050 + ], [ + "key" => "oiseaux|bnf|ark:/12148/cb11932889r", + "doc_count" => 1003 + ], [ + "key" => "météorologie|bnf|ark:/12148/cb11932496x", + "doc_count" => 1001 + ], [ + "key" => "plantes|bnf|ark:/12148/cb11933145f", + "doc_count" => 995 + ], [ + "key" => "animaux sauvages|bnf|ark:/12148/cb11930908q", + "doc_count" => 989 + ], [ + "key" => "arbres|bnf|ark:/12148/cb11934786x", + "doc_count" => 989 + ] ] + ] + ] + ] + ]); + $this->get('/api/v1/stats/themes/?limit=3')->assertTrue($this->response->isOk(), $this->response->content()); + $this->seeJsonEquals(["themes" => [ + "http://ark.bnf.fr/ark:/12148/cb13318415c" => ['label' => 'professions', 'count' => 1412 ], + "http://ark.bnf.fr/ark:/12148/cb16604691s" => ['label' => 'travail non rémunéré', 'count' => 1092 ], + "http://ark.bnf.fr/ark:/12148/cb119339867" => ['label' => 'famille', 'count' => 1050 ] + ]]); - $query = preg_replace('/\s+/', ' ', "select (?o as ?theme) (COUNT(?s) as ?count) where { - ?s a . - ?s ?o . - } - GROUP BY ?o - ORDER BY DESC(?count)"); - - $this->sparqlClient - ->shouldReceive('query') - ->with($query) - ->once() - ->andReturn(new \ArrayIterator([])); - $this->get('/api/v1/stats/themes/?filter='); } - public function testIndexQueryUri() { + public function testIndexIndex() { + + $query = [ + 'index' => env('ELASTICSEARCH_INDEX'), + 'body' => [ + 'size' => 0, + 'aggs' => [ + "subjects" => [ + "nested" => [ "path" => "subject" ], + "aggs" => [ + "subjects" => [ + "terms" => [ + "field" => "subject.label_code", + "size" => 6, + "order" => [ '_count' => 'desc' ], + "include" => ".*\|bnf\|.*" + ] + ] + ] + ] + ] + ] + ]; - $query = preg_replace('/\s+/', ' ', "select (?o as ?theme) (COUNT(?s) as ?count) where { - ?s a . - ?s ?o . - FILTER isIRI(?o) . - } - GROUP BY ?o - ORDER BY DESC(?count)"); + // This is not normally what should be received, but we test that the limit is respected + Es::shouldReceive('search') + ->once() + ->with($query) + ->andReturn([ + "took" => 27, + "timed_out" => false, + "_shards" => [ + "total" => 1, + "successful" => 1, + "failed" => 0 + ], + "hits" => [ + "total" => 3011, + "max_score" => 0.0, + "hits" => [ ] + ], + "aggregations" => [ + "subjects" => [ + "doc_count" => 41524, + "subjects" => [ + "doc_count_error_upper_bound" => 0, + "sum_other_doc_count" => 13678, + "buckets" => [ [ + "key" => "professions|bnf|ark:/12148/cb13318415c", + "doc_count" => 1412 + ], [ + "key" => "travail non rémunéré|bnf|ark:/12148/cb16604691s", + "doc_count" => 1092 + ], [ + "key" => "famille|bnf|ark:/12148/cb119339867", + "doc_count" => 1050 + ], [ + "key" => "oiseaux|bnf|ark:/12148/cb11932889r", + "doc_count" => 1003 + ], [ + "key" => "météorologie|bnf|ark:/12148/cb11932496x", + "doc_count" => 1001 + ], [ + "key" => "plantes|bnf|ark:/12148/cb11933145f", + "doc_count" => 995 + ], [ + "key" => "animaux sauvages|bnf|ark:/12148/cb11930908q", + "doc_count" => 989 + ], [ + "key" => "arbres|bnf|ark:/12148/cb11934786x", + "doc_count" => 989 + ] ] + ] + ] + ] + ]); + $this->get('/api/v1/stats/themes/?limit=3&index=1')->assertTrue($this->response->isOk(), $this->response->content()); + $this->seeJsonEquals(["themes" => [ + "http://ark.bnf.fr/ark:/12148/cb11932889r" => ['label' => 'oiseaux', 'count' => 1003 ], + "http://ark.bnf.fr/ark:/12148/cb11932496x" => ['label' => 'météorologie', 'count' => 1001 ], + "http://ark.bnf.fr/ark:/12148/cb11933145f" => ['label' => 'plantes', 'count' => 995 ] + ]]); - $this->sparqlClient - ->shouldReceive('query') - ->with($query) - ->once() - ->andReturn(new \ArrayIterator([])); - $this->get('/api/v1/stats/themes/?filter=uri'); } - - public function testIndex() { + public function testIndexLimitIndex2() { - $this->sparqlClient - ->shouldReceive('query') + $query = [ + 'index' => env('ELASTICSEARCH_INDEX'), + 'body' => [ + 'size' => 0, + 'aggs' => [ + "subjects" => [ + "nested" => [ "path" => "subject" ], + "aggs" => [ + "subjects" => [ + "terms" => [ + "field" => "subject.label_code", + "size" => 9, + "order" => [ '_count' => 'desc' ], + "include" => ".*\|bnf\|.*" + ] + ] + ] + ] + ] + ] + ]; + + // This is not normally what should be received, but we test that the limit is respected + Es::shouldReceive('search') ->once() - ->andReturn(new \ArrayIterator([ - (object)['theme'=>new Resource('http://lexvo.org/id/iso639-3/gsw'), 'count' => Literal::create(44)], - (object)['theme'=>new Resource('http://ark.bnf.fr/ark:/12148/cb119339867'), 'count' => Literal::create(33)], - (object)['theme'=>Literal::create('Français', 'fr'), 'count' => Literal::create(22)], - ])); - $this->get('/api/v1/stats/themes/')->assertTrue($this->response->isOk(), $this->response->content()); + ->with($query) + ->andReturn([ + "took" => 27, + "timed_out" => false, + "_shards" => [ + "total" => 1, + "successful" => 1, + "failed" => 0 + ], + "hits" => [ + "total" => 3011, + "max_score" => 0.0, + "hits" => [ ] + ], + "aggregations" => [ + "subjects" => [ + "doc_count" => 41524, + "subjects" => [ + "doc_count_error_upper_bound" => 0, + "sum_other_doc_count" => 13678, + "buckets" => [ [ + "key" => "professions|bnf|ark:/12148/cb13318415c", + "doc_count" => 1412 + ], [ + "key" => "travail non rémunéré|bnf|ark:/12148/cb16604691s", + "doc_count" => 1092 + ], [ + "key" => "famille|bnf|ark:/12148/cb119339867", + "doc_count" => 1050 + ], [ + "key" => "oiseaux|bnf|ark:/12148/cb11932889r", + "doc_count" => 1003 + ], [ + "key" => "météorologie|bnf|ark:/12148/cb11932496x", + "doc_count" => 1001 + ], [ + "key" => "plantes|bnf|ark:/12148/cb11933145f", + "doc_count" => 995 + ], [ + "key" => "animaux sauvages|bnf|ark:/12148/cb11930908q", + "doc_count" => 989 + ], [ + "key" => "arbres|bnf|ark:/12148/cb11934786x", + "doc_count" => 989 + ] ] + ] + ] + ] + ]); + $this->get('/api/v1/stats/themes/?limit=3&index=2')->assertTrue($this->response->isOk(), $this->response->content()); $this->seeJsonEquals(["themes" => [ - "http://lexvo.org/id/iso639-3/gsw" => ["label" => "alémanique", "count" => 44], - "http://ark.bnf.fr/ark:/12148/cb119339867" => ["label" => "famille", "count" => 33], - "Français" => ["label" => "Français", "count" => 22], + "http://ark.bnf.fr/ark:/12148/cb11930908q" => ['label' => 'animaux sauvages', 'count' => 989 ], + "http://ark.bnf.fr/ark:/12148/cb11934786x" => ['label' => 'arbres', 'count' => 989 ] ]]); } + }