--- a/server/src/app/Console/Commands/IndexDocuments.php Wed Oct 05 02:31:25 2016 +0200
+++ b/server/src/app/Console/Commands/IndexDocuments.php Thu Oct 06 11:14:32 2016 +0200
@@ -4,6 +4,7 @@
use Illuminate\Console\Command;
use EasyRdf\Resource;
+use EasyRdf\Literal;
use GuzzleHttp\Client;
use CorpusParole\Libraries\Utils;
@@ -11,6 +12,7 @@
use CorpusParole\Libraries\CocoonUtils;
use CorpusParole\Models\GeonamesHierarchy;
use CorpusParole\Services\BnfResolverInterface;
+use CorpusParole\Services\LexvoResolverInterface;
use Es;
class IndexDocuments extends Command
@@ -24,7 +26,8 @@
protected $signature = 'corpus-parole:indexDocuments
{--limit=0 : index only the first n documents, 0 (default) means index everything }
{--no-bulk : index documents one by one instead of using ElasticSearch bulk indexing }
- {--step-size=100 : number of documents to retrieve from repository at a time before indexing}';
+ {--step-size=100 : number of documents to retrieve from repository at a time before indexing}
+ {--reset-geo-cache: reset geo cache befr indexing}';
/**
* The console command description.
@@ -38,10 +41,15 @@
*
* @return void
*/
- public function __construct(DocumentRepository $documentRepository, Client $httpClient, BnfResolverInterface $bnfResolver)
+ public function __construct(
+ DocumentRepository $documentRepository,
+ Client $httpClient,
+ BnfResolverInterface $bnfResolver,
+ LexvoResolverInterface $lexvoResolver)
{
$this->documentRepository = $documentRepository;
$this->bnfResolver = $bnfResolver;
+ $this->lexvoResolver = $lexvoResolver;
$this->httpClient = $httpClient;
parent::__construct();
}
@@ -53,8 +61,12 @@
*
* @return int (1 if sucess, 0 if error)
*/
- private function resetIndex()
+ private function resetIndex($resetGeoCache)
{
+ if($resetGeoCache) {
+ // delete all rows in GeonamesHierarchy
+ GeonamesHierarchy::getQuery()->delete();
+ }
$indexParams = [
'index' => env('ELASTICSEARCH_INDEX')
];
@@ -126,7 +138,7 @@
]
)->getBody();
$hjson = json_decode($apiBody);
- $hcache = new GeonamesHierarchy;
+ $hcache = new GeonamesHierarchy();
$hcache->geonamesid = $geonamesid;
$hcache->hierarchy = $hjson;
$hcache->save();
@@ -173,20 +185,58 @@
private function getSubjects($doc) {
$sres = array_reduce($doc->getSubjects(), function($res, $s) {
- $m = [];
- if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $m) === 1) {
+ $mBnf = [];
+ $mLexvo = [];
+
+ if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $mBnf) === 1) {
array_push($res, [
- 'uri' => $m[0],
- 'code' => $m[1]
+ 'uri' => $mBnf[0],
+ 'code' => $mBnf[1],
+ 'type' => 'bnf'
+ ]);
+ } elseif($s instanceof Resource && preg_match(config('corpusparole.lexvo_url_regexp'), $s->getUri(). $mLexvo) === 1) {
+ array_push($res, [
+ 'uri' => $mLexvo[0],
+ 'code' => $mLexvo[1],
+ 'type' => 'lxv'
]);
}
+
return $res;
}, []);
- $labels = $this->bnfResolver->getLabels(array_unique(array_map(function($so) { return $so['uri'];}, $sres)));
+ $labelsBnf = $this->bnfResolver->getLabels(
+ array_unique(array_reduce(
+ $sres,
+ function($r, $so) {
+ if($so['type'] === 'bnf') {
+ array_push($r, $so['uri']);
+ }
+ return $r;
+ },[]
+ ))
+ );
+ $labelsLexvo = $this->lexvoResolver->getLabels(
+ array_unique(array_reduce(
+ $sres,
+ function($r, $so) {
+ if($so['type'] === 'lxv') {
+ array_push($r, $so['uri']);
+ }
+ return $r;
+ },[]
+ ))
+ );
- return array_map(function($so) use ($labels) { return [ 'label' => $labels[$so['uri']], 'code' => $so['code'], 'label_code' => $labels[$so['uri']]."|".$so['code'] ]; }, $sres);
-
+ return array_map(function($so) use ($labelsBnf, $labelsLexvo) {
+ $label = $so['uri'];
+ if($so['type'] === 'bnf') {
+ $label = $labelsBnf[$label];
+ } elseif ($so['type'] === 'lxv') {
+ $label = $labelsLexvo[$label];
+ }
+ return [ 'label' => $label, 'code' => $so['code'], 'label_code' => $label."|".$so['type']."|".$so['code'] ]; }, $sres
+ );
}
/**
@@ -261,8 +311,9 @@
$stepSize = $this->option('step-size');
$this->comment(' - Indexing with step size of '.$stepSize);
+ $resetGeoCache = $this->option('reset-geo-cache', false);
$this->info('Resetting index...');
- $success = $this->resetIndex();
+ $success = $this->resetIndex($resetGeoCache);
if($success==1){
$this->comment('Index reset!');
}
--- a/server/src/tests/Controllers/ThemeControllerTest.php Wed Oct 05 02:31:25 2016 +0200
+++ b/server/src/tests/Controllers/ThemeControllerTest.php Thu Oct 06 11:14:32 2016 +0200
@@ -10,16 +10,10 @@
*/
class ThemeControllerTest extends TestCase {
- private $sparqlClient;
-
public function setUp() {
parent::setup();
- // create a mock of the post repository interface and inject it into the
- // IoC container
- $this->sparqlClient = m::mock('CorpusParole\Libraries\Sparql\SparqlClient');
- $this->app->instance('CorpusParole\Libraries\Sparql\SparqlClient', $this->sparqlClient);
}
public function tearDown() {
@@ -27,130 +21,602 @@
parent::tearDown();
}
- public function testIndexQuery() {
+ public function testIndex() {
+
+ $query = [
+ 'index' => env('ELASTICSEARCH_INDEX'),
+ 'body' => [
+ 'size' => 0,
+ 'aggs' => [
+ "subjects" => [
+ "nested" => [ "path" => "subject" ],
+ "aggs" => [
+ "subjects" => [
+ "terms" => [
+ "field" => "subject.label_code",
+ "size" => config('corpusparole.theme_default_limit'),
+ "order" => [ '_count' => 'desc' ],
+ "include" => ".*\|bnf\|.*"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
- $query = preg_replace('/\s+/', ' ', "select (?o as ?theme) (COUNT(?s) as ?count) where {
- ?s a <http://www.europeana.eu/schemas/edm/ProvidedCHO> .
- ?s <http://purl.org/dc/elements/1.1/subject> ?o .
- FILTER (isIRI(?o) && regex(str(?o), '^".config('corpusparole.bnf_ark_base_url')."')) .
- }
- GROUP BY ?o
- ORDER BY DESC(?count)");
+ Es::shouldReceive('search')
+ ->once()
+ ->with($query)
+ ->andReturn([
+ "took" => 27,
+ "timed_out" => false,
+ "_shards" => [
+ "total" => 1,
+ "successful" => 1,
+ "failed" => 0
+ ],
+ "hits" => [
+ "total" => 3011,
+ "max_score" => 0.0,
+ "hits" => [ ]
+ ],
+ "aggregations" => [
+ "subjects" => [
+ "doc_count" => 41524,
+ "subjects" => [
+ "doc_count_error_upper_bound" => 0,
+ "sum_other_doc_count" => 13678,
+ "buckets" => [ [
+ "key" => "professions|bnf|ark:/12148/cb13318415c",
+ "doc_count" => 1412
+ ], [
+ "key" => "travail non rémunéré|bnf|ark:/12148/cb16604691s",
+ "doc_count" => 1092
+ ], [
+ "key" => "famille|bnf|ark:/12148/cb119339867",
+ "doc_count" => 1050
+ ], [
+ "key" => "oiseaux|bnf|ark:/12148/cb11932889r",
+ "doc_count" => 1003
+ ], [
+ "key" => "météorologie|bnf|ark:/12148/cb11932496x",
+ "doc_count" => 1001
+ ], [
+ "key" => "plantes|bnf|ark:/12148/cb11933145f",
+ "doc_count" => 995
+ ], [
+ "key" => "animaux sauvages|bnf|ark:/12148/cb11930908q",
+ "doc_count" => 989
+ ], [
+ "key" => "arbres|bnf|ark:/12148/cb11934786x",
+ "doc_count" => 989
+ ] ]
+ ]
+ ]
+ ]
+ ]);
+ $this->get('/api/v1/stats/themes/')->assertTrue($this->response->isOk(), $this->response->content());
+ $this->seeJsonEquals(["themes" => [
+ "http://ark.bnf.fr/ark:/12148/cb13318415c" => ['label' => 'professions', 'count' => 1412 ],
+ "http://ark.bnf.fr/ark:/12148/cb16604691s" => ['label' => 'travail non rémunéré', 'count' => 1092 ],
+ "http://ark.bnf.fr/ark:/12148/cb119339867" => ['label' => 'famille', 'count' => 1050 ],
+ "http://ark.bnf.fr/ark:/12148/cb11932889r" => ['label' => 'oiseaux', 'count' => 1003 ],
+ "http://ark.bnf.fr/ark:/12148/cb11932496x" => ['label' => 'météorologie', 'count' => 1001 ],
+ "http://ark.bnf.fr/ark:/12148/cb11933145f" => ['label' => 'plantes', 'count' => 995 ],
+ "http://ark.bnf.fr/ark:/12148/cb11930908q" => ['label' => 'animaux sauvages', 'count' => 989 ],
+ "http://ark.bnf.fr/ark:/12148/cb11934786x" => ['label' => 'arbres', 'count' => 989 ]
+ ]]);
- $this->sparqlClient
- ->shouldReceive('query')
- ->with($query)
- ->once()
- ->andReturn(new \ArrayIterator([]));
- $this->get('/api/v1/stats/themes/');
}
- public function testIndexQueryBnf() {
+ public function testIndexAll() {
+
+ $query = [
+ 'index' => env('ELASTICSEARCH_INDEX'),
+ 'body' => [
+ 'size' => 0,
+ 'aggs' => [
+ "subjects" => [
+ "nested" => [ "path" => "subject" ],
+ "aggs" => [
+ "subjects" => [
+ "terms" => [
+ "field" => "subject.label_code",
+ "size" => config('corpusparole.theme_default_limit'),
+ "order" => [ '_count' => 'desc' ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
- $query = preg_replace('/\s+/', ' ', "select (?o as ?theme) (COUNT(?s) as ?count) where {
- ?s a <http://www.europeana.eu/schemas/edm/ProvidedCHO> .
- ?s <http://purl.org/dc/elements/1.1/subject> ?o .
- FILTER (isIRI(?o) && regex(str(?o), '^".config('corpusparole.bnf_ark_base_url')."')) .
- }
- GROUP BY ?o
- ORDER BY DESC(?count)");
+ Es::shouldReceive('search')
+ ->once()
+ ->with($query)
+ ->andReturn([
+ "took" => 27,
+ "timed_out" => false,
+ "_shards" => [
+ "total" => 1,
+ "successful" => 1,
+ "failed" => 0
+ ],
+ "hits" => [
+ "total" => 3011,
+ "max_score" => 0.0,
+ "hits" => [ ]
+ ],
+ "aggregations" => [
+ "subjects" => [
+ "doc_count" => 41524,
+ "subjects" => [
+ "doc_count_error_upper_bound" => 0,
+ "sum_other_doc_count" => 13678,
+ "buckets" => [ [
+ "key" => "professions|bnf|ark:/12148/cb13318415c",
+ "doc_count" => 1412
+ ], [
+ "key" => "travail non rémunéré|bnf|ark:/12148/cb16604691s",
+ "doc_count" => 1092
+ ], [
+ "key" => "famille|bnf|ark:/12148/cb119339867",
+ "doc_count" => 1050
+ ], [
+ "key" => "oiseaux|bnf|ark:/12148/cb11932889r",
+ "doc_count" => 1003
+ ], [
+ "key" => "Français|lxv|fra",
+ "doc_count" => 1002
+ ], [
+ "key" => "météorologie|bnf|ark:/12148/cb11932496x",
+ "doc_count" => 1001
+ ], [
+ "key" => "plantes|bnf|ark:/12148/cb11933145f",
+ "doc_count" => 995
+ ], [
+ "key" => "Breton|lxv|bzh",
+ "doc_count" => 992
+ ], [
+ "key" => "animaux sauvages|bnf|ark:/12148/cb11930908q",
+ "doc_count" => 989
+ ], [
+ "key" => "arbres|bnf|ark:/12148/cb11934786x",
+ "doc_count" => 989
+ ] ]
+ ]
+ ]
+ ]
+ ]);
+ $this->get('/api/v1/stats/themes/?filter=all')->assertTrue($this->response->isOk(), $this->response->content());
+ $this->seeJsonEquals(["themes" => [
+ "http://ark.bnf.fr/ark:/12148/cb13318415c" => ['label' => 'professions', 'count' => 1412 ],
+ "http://ark.bnf.fr/ark:/12148/cb16604691s" => ['label' => 'travail non rémunéré', 'count' => 1092 ],
+ "http://ark.bnf.fr/ark:/12148/cb119339867" => ['label' => 'famille', 'count' => 1050 ],
+ "http://ark.bnf.fr/ark:/12148/cb11932889r" => ['label' => 'oiseaux', 'count' => 1003 ],
+ "http://lexvo.org/id/iso639-3/fra" => ['label' => 'Français', 'count' => 1002 ],
+ "http://ark.bnf.fr/ark:/12148/cb11932496x" => ['label' => 'météorologie', 'count' => 1001 ],
+ "http://ark.bnf.fr/ark:/12148/cb11933145f" => ['label' => 'plantes', 'count' => 995 ],
+ "http://lexvo.org/id/iso639-3/bzh" => ['label' => 'Breton', 'count' => 992 ],
+ "http://ark.bnf.fr/ark:/12148/cb11930908q" => ['label' => 'animaux sauvages', 'count' => 989 ],
+ "http://ark.bnf.fr/ark:/12148/cb11934786x" => ['label' => 'arbres', 'count' => 989 ]
+ ]]);
- $this->sparqlClient
- ->shouldReceive('query')
- ->with($query)
- ->once()
- ->andReturn(new \ArrayIterator([]));
- $this->get('/api/v1/stats/themes/?filter=bnf');
}
+ public function testIndexLexvo() {
- public function testIndexQueryAll() {
+ $query = [
+ 'index' => env('ELASTICSEARCH_INDEX'),
+ 'body' => [
+ 'size' => 0,
+ 'aggs' => [
+ "subjects" => [
+ "nested" => [ "path" => "subject" ],
+ "aggs" => [
+ "subjects" => [
+ "terms" => [
+ "field" => "subject.label_code",
+ "size" => config('corpusparole.theme_default_limit'),
+ "order" => [ '_count' => 'desc' ],
+ "include" => ".*\|lxv\|.*"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+
+ Es::shouldReceive('search')
+ ->once()
+ ->with($query)
+ ->andReturn([
+ "took" => 27,
+ "timed_out" => false,
+ "_shards" => [
+ "total" => 1,
+ "successful" => 1,
+ "failed" => 0
+ ],
+ "hits" => [
+ "total" => 3011,
+ "max_score" => 0.0,
+ "hits" => [ ]
+ ],
+ "aggregations" => [
+ "subjects" => [
+ "doc_count" => 41524,
+ "subjects" => [
+ "doc_count_error_upper_bound" => 0,
+ "sum_other_doc_count" => 13678,
+ "buckets" => [ [
+ "key" => "Français|lxv|fra",
+ "doc_count" => 1002
+ ], [
+ "key" => "Breton|lxv|bzh",
+ "doc_count" => 992
+ ] ]
+ ]
+ ]
+ ]
+ ]);
+ $this->get('/api/v1/stats/themes/?filter=lexvo')->assertTrue($this->response->isOk(), $this->response->content());
+ $this->seeJsonEquals(["themes" => [
+ "http://lexvo.org/id/iso639-3/fra" => ['label' => 'Français', 'count' => 1002 ],
+ "http://lexvo.org/id/iso639-3/bzh" => ['label' => 'Breton', 'count' => 992 ]
+ ]]);
+
+ }
- $query = preg_replace('/\s+/', ' ', "select (?o as ?theme) (COUNT(?s) as ?count) where {
- ?s a <http://www.europeana.eu/schemas/edm/ProvidedCHO> .
- ?s <http://purl.org/dc/elements/1.1/subject> ?o .
- }
- GROUP BY ?o
- ORDER BY DESC(?count)");
+ private function getOrderQuery($order) {
+ return [
+ 'index' => env('ELASTICSEARCH_INDEX'),
+ 'body' => [
+ 'size' => 0,
+ 'aggs' => [
+ "subjects" => [
+ "nested" => [ "path" => "subject" ],
+ "aggs" => [
+ "subjects" => [
+ "terms" => [
+ "field" => "subject.label_code",
+ "size" => config('corpusparole.theme_default_limit'),
+ "order" => $order,
+ "include" => ".*\|bnf\|.*"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+
+ }
- $this->sparqlClient
- ->shouldReceive('query')
- ->with($query)
+ private function getRespSort() {
+ return [
+ "took" => 27,
+ "timed_out" => false,
+ "_shards" => [
+ "total" => 1,
+ "successful" => 1,
+ "failed" => 0
+ ],
+ "hits" => [
+ "total" => 3011,
+ "max_score" => 0.0,
+ "hits" => [ ]
+ ],
+ "aggregations" => [
+ "subjects" => [
+ "doc_count" => 41524,
+ "subjects" => [
+ "doc_count_error_upper_bound" => 0,
+ "sum_other_doc_count" => 13678,
+ "buckets" => [ ]
+ ]
+ ]
+ ]
+ ];
+ }
+
+ public function testIndexSort() {
+
+ Es::shouldReceive('search')
->once()
- ->andReturn(new \ArrayIterator([]));
- $this->get('/api/v1/stats/themes/?filter=all');
+ ->with($this->getOrderQuery(['_count' => 'desc']))
+ ->andReturn($this->getRespSort());
+ $this->get('/api/v1/stats/themes/')->assertTrue($this->response->isOk(), $this->response->content());
+
+ }
+
+ public function testIndexSortReverseCount() {
+ Es::shouldReceive('search')
+ ->once()
+ ->with($this->getOrderQuery(['_count' => 'asc']))
+ ->andReturn($this->getRespSort());
+ $this->get('/api/v1/stats/themes/?sort=-count')->assertTrue($this->response->isOk(), $this->response->content());
+ }
+
+ public function testIndexSortLabel() {
+ Es::shouldReceive('search')
+ ->once()
+ ->with($this->getOrderQuery(['_term' => 'asc']))
+ ->andReturn($this->getRespSort());
+ $this->get('/api/v1/stats/themes/?sort=label')->assertTrue($this->response->isOk(), $this->response->content());
+ }
+
+ public function testIndexSortAlphabetical() {
+ Es::shouldReceive('search')
+ ->once()
+ ->with($this->getOrderQuery(['_term' => 'asc']))
+ ->andReturn($this->getRespSort());
+ $this->get('/api/v1/stats/themes/?sort=alphabetical')->assertTrue($this->response->isOk(), $this->response->content());
+ }
+
+ public function testIndexSortLabelReverse() {
+ Es::shouldReceive('search')
+ ->once()
+ ->with($this->getOrderQuery(['_term' => 'desc']))
+ ->andReturn($this->getRespSort());
+ $this->get('/api/v1/stats/themes/?sort=-label')->assertTrue($this->response->isOk(), $this->response->content());
}
- public function testIndexQueryNone() {
+ public function testIndexLimit() {
- $query = preg_replace('/\s+/', ' ', "select (?o as ?theme) (COUNT(?s) as ?count) where {
- ?s a <http://www.europeana.eu/schemas/edm/ProvidedCHO> .
- ?s <http://purl.org/dc/elements/1.1/subject> ?o .
- }
- GROUP BY ?o
- ORDER BY DESC(?count)");
-
- $this->sparqlClient
- ->shouldReceive('query')
- ->with($query)
- ->once()
- ->andReturn(new \ArrayIterator([]));
- $this->get('/api/v1/stats/themes/?filter=none');
- }
+ $query = [
+ 'index' => env('ELASTICSEARCH_INDEX'),
+ 'body' => [
+ 'size' => 0,
+ 'aggs' => [
+ "subjects" => [
+ "nested" => [ "path" => "subject" ],
+ "aggs" => [
+ "subjects" => [
+ "terms" => [
+ "field" => "subject.label_code",
+ "size" => 3,
+ "order" => [ '_count' => 'desc' ],
+ "include" => ".*\|bnf\|.*"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
-
- public function testIndexQueryEmpty() {
+ // This is not normally what should be received, but we test that the limit is respected
+ Es::shouldReceive('search')
+ ->once()
+ ->with($query)
+ ->andReturn([
+ "took" => 27,
+ "timed_out" => false,
+ "_shards" => [
+ "total" => 1,
+ "successful" => 1,
+ "failed" => 0
+ ],
+ "hits" => [
+ "total" => 3011,
+ "max_score" => 0.0,
+ "hits" => [ ]
+ ],
+ "aggregations" => [
+ "subjects" => [
+ "doc_count" => 41524,
+ "subjects" => [
+ "doc_count_error_upper_bound" => 0,
+ "sum_other_doc_count" => 13678,
+ "buckets" => [ [
+ "key" => "professions|bnf|ark:/12148/cb13318415c",
+ "doc_count" => 1412
+ ], [
+ "key" => "travail non rémunéré|bnf|ark:/12148/cb16604691s",
+ "doc_count" => 1092
+ ], [
+ "key" => "famille|bnf|ark:/12148/cb119339867",
+ "doc_count" => 1050
+ ], [
+ "key" => "oiseaux|bnf|ark:/12148/cb11932889r",
+ "doc_count" => 1003
+ ], [
+ "key" => "météorologie|bnf|ark:/12148/cb11932496x",
+ "doc_count" => 1001
+ ], [
+ "key" => "plantes|bnf|ark:/12148/cb11933145f",
+ "doc_count" => 995
+ ], [
+ "key" => "animaux sauvages|bnf|ark:/12148/cb11930908q",
+ "doc_count" => 989
+ ], [
+ "key" => "arbres|bnf|ark:/12148/cb11934786x",
+ "doc_count" => 989
+ ] ]
+ ]
+ ]
+ ]
+ ]);
+ $this->get('/api/v1/stats/themes/?limit=3')->assertTrue($this->response->isOk(), $this->response->content());
+ $this->seeJsonEquals(["themes" => [
+ "http://ark.bnf.fr/ark:/12148/cb13318415c" => ['label' => 'professions', 'count' => 1412 ],
+ "http://ark.bnf.fr/ark:/12148/cb16604691s" => ['label' => 'travail non rémunéré', 'count' => 1092 ],
+ "http://ark.bnf.fr/ark:/12148/cb119339867" => ['label' => 'famille', 'count' => 1050 ]
+ ]]);
- $query = preg_replace('/\s+/', ' ', "select (?o as ?theme) (COUNT(?s) as ?count) where {
- ?s a <http://www.europeana.eu/schemas/edm/ProvidedCHO> .
- ?s <http://purl.org/dc/elements/1.1/subject> ?o .
- }
- GROUP BY ?o
- ORDER BY DESC(?count)");
-
- $this->sparqlClient
- ->shouldReceive('query')
- ->with($query)
- ->once()
- ->andReturn(new \ArrayIterator([]));
- $this->get('/api/v1/stats/themes/?filter=');
}
- public function testIndexQueryUri() {
+ public function testIndexIndex() {
+
+ $query = [
+ 'index' => env('ELASTICSEARCH_INDEX'),
+ 'body' => [
+ 'size' => 0,
+ 'aggs' => [
+ "subjects" => [
+ "nested" => [ "path" => "subject" ],
+ "aggs" => [
+ "subjects" => [
+ "terms" => [
+ "field" => "subject.label_code",
+ "size" => 6,
+ "order" => [ '_count' => 'desc' ],
+ "include" => ".*\|bnf\|.*"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
- $query = preg_replace('/\s+/', ' ', "select (?o as ?theme) (COUNT(?s) as ?count) where {
- ?s a <http://www.europeana.eu/schemas/edm/ProvidedCHO> .
- ?s <http://purl.org/dc/elements/1.1/subject> ?o .
- FILTER isIRI(?o) .
- }
- GROUP BY ?o
- ORDER BY DESC(?count)");
+ // This is not normally what should be received, but we test that the limit is respected
+ Es::shouldReceive('search')
+ ->once()
+ ->with($query)
+ ->andReturn([
+ "took" => 27,
+ "timed_out" => false,
+ "_shards" => [
+ "total" => 1,
+ "successful" => 1,
+ "failed" => 0
+ ],
+ "hits" => [
+ "total" => 3011,
+ "max_score" => 0.0,
+ "hits" => [ ]
+ ],
+ "aggregations" => [
+ "subjects" => [
+ "doc_count" => 41524,
+ "subjects" => [
+ "doc_count_error_upper_bound" => 0,
+ "sum_other_doc_count" => 13678,
+ "buckets" => [ [
+ "key" => "professions|bnf|ark:/12148/cb13318415c",
+ "doc_count" => 1412
+ ], [
+ "key" => "travail non rémunéré|bnf|ark:/12148/cb16604691s",
+ "doc_count" => 1092
+ ], [
+ "key" => "famille|bnf|ark:/12148/cb119339867",
+ "doc_count" => 1050
+ ], [
+ "key" => "oiseaux|bnf|ark:/12148/cb11932889r",
+ "doc_count" => 1003
+ ], [
+ "key" => "météorologie|bnf|ark:/12148/cb11932496x",
+ "doc_count" => 1001
+ ], [
+ "key" => "plantes|bnf|ark:/12148/cb11933145f",
+ "doc_count" => 995
+ ], [
+ "key" => "animaux sauvages|bnf|ark:/12148/cb11930908q",
+ "doc_count" => 989
+ ], [
+ "key" => "arbres|bnf|ark:/12148/cb11934786x",
+ "doc_count" => 989
+ ] ]
+ ]
+ ]
+ ]
+ ]);
+ $this->get('/api/v1/stats/themes/?limit=3&index=1')->assertTrue($this->response->isOk(), $this->response->content());
+ $this->seeJsonEquals(["themes" => [
+ "http://ark.bnf.fr/ark:/12148/cb11932889r" => ['label' => 'oiseaux', 'count' => 1003 ],
+ "http://ark.bnf.fr/ark:/12148/cb11932496x" => ['label' => 'météorologie', 'count' => 1001 ],
+ "http://ark.bnf.fr/ark:/12148/cb11933145f" => ['label' => 'plantes', 'count' => 995 ]
+ ]]);
- $this->sparqlClient
- ->shouldReceive('query')
- ->with($query)
- ->once()
- ->andReturn(new \ArrayIterator([]));
- $this->get('/api/v1/stats/themes/?filter=uri');
}
-
- public function testIndex() {
+ public function testIndexLimitIndex2() {
- $this->sparqlClient
- ->shouldReceive('query')
+ $query = [
+ 'index' => env('ELASTICSEARCH_INDEX'),
+ 'body' => [
+ 'size' => 0,
+ 'aggs' => [
+ "subjects" => [
+ "nested" => [ "path" => "subject" ],
+ "aggs" => [
+ "subjects" => [
+ "terms" => [
+ "field" => "subject.label_code",
+ "size" => 9,
+ "order" => [ '_count' => 'desc' ],
+ "include" => ".*\|bnf\|.*"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+
+ // This is not normally what should be received, but we test that the limit is respected
+ Es::shouldReceive('search')
->once()
- ->andReturn(new \ArrayIterator([
- (object)['theme'=>new Resource('http://lexvo.org/id/iso639-3/gsw'), 'count' => Literal::create(44)],
- (object)['theme'=>new Resource('http://ark.bnf.fr/ark:/12148/cb119339867'), 'count' => Literal::create(33)],
- (object)['theme'=>Literal::create('Français', 'fr'), 'count' => Literal::create(22)],
- ]));
- $this->get('/api/v1/stats/themes/')->assertTrue($this->response->isOk(), $this->response->content());
+ ->with($query)
+ ->andReturn([
+ "took" => 27,
+ "timed_out" => false,
+ "_shards" => [
+ "total" => 1,
+ "successful" => 1,
+ "failed" => 0
+ ],
+ "hits" => [
+ "total" => 3011,
+ "max_score" => 0.0,
+ "hits" => [ ]
+ ],
+ "aggregations" => [
+ "subjects" => [
+ "doc_count" => 41524,
+ "subjects" => [
+ "doc_count_error_upper_bound" => 0,
+ "sum_other_doc_count" => 13678,
+ "buckets" => [ [
+ "key" => "professions|bnf|ark:/12148/cb13318415c",
+ "doc_count" => 1412
+ ], [
+ "key" => "travail non rémunéré|bnf|ark:/12148/cb16604691s",
+ "doc_count" => 1092
+ ], [
+ "key" => "famille|bnf|ark:/12148/cb119339867",
+ "doc_count" => 1050
+ ], [
+ "key" => "oiseaux|bnf|ark:/12148/cb11932889r",
+ "doc_count" => 1003
+ ], [
+ "key" => "météorologie|bnf|ark:/12148/cb11932496x",
+ "doc_count" => 1001
+ ], [
+ "key" => "plantes|bnf|ark:/12148/cb11933145f",
+ "doc_count" => 995
+ ], [
+ "key" => "animaux sauvages|bnf|ark:/12148/cb11930908q",
+ "doc_count" => 989
+ ], [
+ "key" => "arbres|bnf|ark:/12148/cb11934786x",
+ "doc_count" => 989
+ ] ]
+ ]
+ ]
+ ]
+ ]);
+ $this->get('/api/v1/stats/themes/?limit=3&index=2')->assertTrue($this->response->isOk(), $this->response->content());
$this->seeJsonEquals(["themes" => [
- "http://lexvo.org/id/iso639-3/gsw" => ["label" => "alémanique", "count" => 44],
- "http://ark.bnf.fr/ark:/12148/cb119339867" => ["label" => "famille", "count" => 33],
- "Français" => ["label" => "Français", "count" => 22],
+ "http://ark.bnf.fr/ark:/12148/cb11930908q" => ['label' => 'animaux sauvages', 'count' => 989 ],
+ "http://ark.bnf.fr/ark:/12148/cb11934786x" => ['label' => 'arbres', 'count' => 989 ]
]]);
}
+
}