make all stats view use elasticsearch. Make sure that the document discourse types are taken form the predefined list
authorymh <ymh.work@gmail.com>
Thu, 20 Oct 2016 17:27:36 +0200
changeset 377 52169c718513
parent 376 02f113d43f18
child 378 5b47eab083f3
make all stats view use elasticsearch. Make sure that the document discourse types are taken form the predefined list
server/src/app/Http/Controllers/Api/DateStatsController.php
server/src/app/Http/Controllers/Api/DiscourseController.php
server/src/app/Http/Controllers/Api/GeoStatsController.php
server/src/app/Http/Controllers/Api/LanguageController.php
server/src/app/Http/Controllers/Api/ThemeController.php
server/src/app/Models/Document.php
server/src/tests/Controllers/DateStatsControllerTest.php
server/src/tests/Controllers/DiscourseControllerTest.php
server/src/tests/Controllers/LanguageControllerTest.php
--- a/server/src/app/Http/Controllers/Api/DateStatsController.php	Thu Oct 20 15:09:31 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/DateStatsController.php	Thu Oct 20 17:27:36 2016 +0200
@@ -2,24 +2,15 @@
 
 namespace CorpusParole\Http\Controllers\Api;
 
-// use CorpusParole\Http\Requests;
 use Illuminate\Http\Request;
 use Log;
 use Es;
 
-use CorpusParole\Libraries\Sparql\SparqlClient;
-
 use CorpusParole\Http\Controllers\Controller;
 
 
 class DateStatsController extends Controller
 {
-    private $sparqlClient = null;
-
-    public function __construct(SparqlClient $sparqlClient) {
-        $this->sparqlClient = $sparqlClient;
-    }
-
     /**
      * Display the specified resource.
      *
--- a/server/src/app/Http/Controllers/Api/DiscourseController.php	Thu Oct 20 15:09:31 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/DiscourseController.php	Thu Oct 20 17:27:36 2016 +0200
@@ -2,25 +2,14 @@
 
 namespace CorpusParole\Http\Controllers\Api;
 
-// use CorpusParole\Http\Requests;
 use CorpusParole\Http\Controllers\Controller;
 
 use Illuminate\Http\Request;
-use EasyRdf\Resource;
-use EasyRdf\Literal;
 
-use CorpusParole\Libraries\Sparql\SparqlClient;
-use CorpusParole\Services\LexvoResolverInterface;
-use CorpusParole\Services\BnfResolverInterface;
-
+use Es;
 
 class DiscourseController extends Controller
 {
-    private $sparqlClient = null;
-
-    public function __construct(SparqlClient $sparqlClient) {
-        $this->sparqlClient = $sparqlClient;
-    }
 
     /**
      * Display the specified resource.
@@ -30,21 +19,28 @@
     public function index(Request $request)
     {
 
-        $query =  preg_replace('/\s+/', ' ', "SELECT (?o AS ?res) (COUNT(?s) AS ?count) WHERE {
-            ?s a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.
-            ?s <http://purl.org/dc/elements/1.1/type> ?o.
-            FILTER(uri(?o) in (<".implode('>,<', array_keys(config('corpusparole.corpus_discourse_type'))).">))
-        }
-        GROUP BY ?o
-        ORDER BY DESC(?count)");
+        $query = [ "match_all" => []];
 
-        $docs = $this->sparqlClient->query($query);
+        $esQuery = [
+            'index' => env('ELASTICSEARCH_INDEX'),
+            'body' => [
+                "size" => 0,
+                "query" => $query,
+                "aggs" => [
+                    "discourses" => [
+                        "terms" => [ "field" => "discourse_types", "order" => [ "_count" => "desc" ], "size" => 0 ]
+                    ]
+                ]
+            ]
+        ];
+
+        $esRes = Es::search($esQuery);
 
         $discourses = [];
 
-        foreach ($docs as $row) {
-            $key = $row->res->getUri();
-            $count = $row->count->getValue();
+        foreach ($esRes['aggregations']['discourses']['buckets'] as $b) {
+            $key = $b['key'];
+            $count = $b['doc_count'];
             $label = config('corpusparole.corpus_discourse_type')[$key];
             $discourses[$key] = [
                 "label" => $label,
--- a/server/src/app/Http/Controllers/Api/GeoStatsController.php	Thu Oct 20 15:09:31 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/GeoStatsController.php	Thu Oct 20 17:27:36 2016 +0200
@@ -4,7 +4,6 @@
 
 use Illuminate\Http\Request;
 
-use CorpusParole\Http\Requests;
 use CorpusParole\Http\Controllers\Controller;
 use Es;
 use Log;
--- a/server/src/app/Http/Controllers/Api/LanguageController.php	Thu Oct 20 15:09:31 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/LanguageController.php	Thu Oct 20 17:27:36 2016 +0200
@@ -2,41 +2,42 @@
 
 namespace CorpusParole\Http\Controllers\Api;
 
-// use Illuminate\Http\Request;
-// use CorpusParole\Http\Requests;
+use Illuminate\Http\Request;
 use CorpusParole\Http\Controllers\Controller;
 
-use CorpusParole\Libraries\Sparql\SparqlClient;
-
+use Es;
 
 class LanguageController extends Controller
 {
-    private $sparqlClient = null;
-
-    public function __construct(SparqlClient $sparqlClient) {
-        $this->sparqlClient = $sparqlClient;
-    }
-
     /**
      * Display the specified resource.
      *
      * @return \Illuminate\Http\Response
      */
-    public function index()
+    public function index(Request $request)
     {
 
-        $query = "select ?lang (count(?lang) as ?count) where {
-            ?s a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.
-            ?s <http://purl.org/dc/elements/1.1/language> ?lang
-        }
-        GROUP BY ?lang
-        ORDER BY DESC(?count)";
+        $query = [ "match_all" => []];
 
-        $docs = $this->sparqlClient->query($query);
+        $esQuery = [
+            'index' => env('ELASTICSEARCH_INDEX'),
+            'body' => [
+                "size" => 0,
+                "query" => $query,
+                "aggs" => [
+                    "languages" => [
+                        "terms" => [ "field" => "language", "order" => [ "_count" => "desc" ], "size" => 0 ]
+                    ]
+                ]
+            ]
+        ];
+
+        $esRes = Es::search($esQuery);
 
         $languages = [];
-        foreach ($docs as $row) {
-            $languages[$row->lang->getUri()] = $row->count->getValue();
+
+        foreach ($esRes['aggregations']['languages']['buckets'] as $b) {
+            $languages[$b['key']] = $b['doc_count'];
         }
 
         return response()->json(['languages' => $languages ]);
--- a/server/src/app/Http/Controllers/Api/ThemeController.php	Thu Oct 20 15:09:31 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/ThemeController.php	Thu Oct 20 17:27:36 2016 +0200
@@ -13,17 +13,13 @@
 use Es;
 use Log;
 
-use CorpusParole\Libraries\Sparql\SparqlClient;
 use CorpusParole\Services\LexvoResolverInterface;
 use CorpusParole\Services\BnfResolverInterface;
 
 
 class ThemeController extends Controller
 {
-    private $sparqlClient = null;
-
-    public function __construct(SparqlClient $sparqlClient, LexvoResolverInterface $lexvoResolver, BnfResolverInterface $bnfResolver) {
-        $this->sparqlClient = $sparqlClient;
+    public function __construct(LexvoResolverInterface $lexvoResolver, BnfResolverInterface $bnfResolver) {
         $this->lexvoResolver = $lexvoResolver;
         $this->bnfResolver = $bnfResolver;
     }
--- a/server/src/app/Models/Document.php	Thu Oct 20 15:09:31 2016 +0200
+++ b/server/src/app/Models/Document.php	Thu Oct 20 17:27:36 2016 +0200
@@ -100,13 +100,17 @@
 
     public function getDiscourseTypes() {
         return array_values(array_filter($this->getTypes(), function($v) {
-            return $v instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $v->getUri());
+            return $v instanceof Resource
+                && preg_match(config('corpusparole.bnf_ark_url_regexp'), $v->getUri())
+                && array_key_exists($v->getUri(), config('corpusparole.corpus_discourse_type'));
         }));
     }
 
     public function getOtherTypes() {
         $res = array_values(array_filter($this->getTypes(), function($v) {
-            return !($v instanceof Resource) || !preg_match(config('corpusparole.bnf_ark_url_regexp'), $v->getUri());
+            return !($v instanceof Resource)
+                || !preg_match(config('corpusparole.bnf_ark_url_regexp'), $v->getUri())
+                || !array_key_exists($v->getUri(), config('corpusparole.corpus_discourse_type'));
         }));
         return $res;
     }
--- a/server/src/tests/Controllers/DateStatsControllerTest.php	Thu Oct 20 15:09:31 2016 +0200
+++ b/server/src/tests/Controllers/DateStatsControllerTest.php	Thu Oct 20 17:27:36 2016 +0200
@@ -8,13 +8,11 @@
 
 class DateStatsControllerTest extends TestCase
 {
-    private $sparqlClient;
-
     const ES_QUERY = [
         'index' => 'corpus',
         'body' => [
             "size" => 0,
-            "query" => [ "match_all" => []],
+            "query" => [ "match_all" => [] ],
             "aggs" => [
                 "datestats" => [
                     "nested"=> [
--- a/server/src/tests/Controllers/DiscourseControllerTest.php	Thu Oct 20 15:09:31 2016 +0200
+++ b/server/src/tests/Controllers/DiscourseControllerTest.php	Thu Oct 20 17:27:36 2016 +0200
@@ -2,24 +2,28 @@
 
 use Mockery as m;
 
-use EasyRdf\Resource;
-use EasyRdf\Literal;
-
 /**
  *
  */
 class DiscourseControllerTest extends TestCase {
 
-    private $sparqlClient;
+    const ES_QUERY = [
+        'index' => 'corpus',
+        'body' => [
+            "size" => 0,
+            "query" => [ "match_all" => [] ],
+            "aggs" => [
+                "discourses" => [
+                    "terms" => [ "field" => "discourse_types", "order" => [ "_count" => "desc" ], "size" => 0 ]
+                ]
+            ]
+        ]
+    ];
 
     public function setUp() {
 
         parent::setup();
 
-        // create a mock of the post repository interface and inject it into the
-        // IoC container
-        $this->sparqlClient = m::mock('CorpusParole\Libraries\Sparql\SparqlClient');
-        $this->app->instance('CorpusParole\Libraries\Sparql\SparqlClient', $this->sparqlClient);
     }
 
     public function tearDown() {
@@ -27,39 +31,79 @@
         parent::tearDown();
     }
 
-    public function testIndexQuery() {
-
-        $query = preg_replace('/\s+/', ' ', "SELECT (?o AS ?res) (COUNT(?s) AS ?count) WHERE {
-            ?s a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.
-            ?s <http://purl.org/dc/elements/1.1/type> ?o.
-            FILTER(uri(?o) in (<".implode('>,<', array_keys(config('corpusparole.corpus_discourse_type'))).">))
-        }
-        GROUP BY ?o
-        ORDER BY DESC(?count)");
-
-        $this->sparqlClient
-            ->shouldReceive('query')
-            ->with($query)
-            ->once()
-            ->andReturn(new \ArrayIterator([]));
-        $this->get('/api/v1/stats/discourses/');
-    }
-
     public function testIndex() {
 
-        $this->sparqlClient
-            ->shouldReceive('query')
-            ->once()
-            ->andReturn(new \ArrayIterator([
-                (object)['res'=>new Resource('http://ark.bnf.fr/ark:/12148/cb12083158d'), 'count' => Literal::create(44)],
-                (object)['res'=>new Resource('http://ark.bnf.fr/ark:/12148/cb119783362'), 'count' => Literal::create(33)],
-                (object)['res'=>new Resource('http://ark.bnf.fr/ark:/12148/cb13319048g'), 'count' => Literal::create(22)],
-            ]));
+        Es::shouldReceive('search')
+                ->once()
+                ->with(self::ES_QUERY)
+                ->andReturn(json_decode('{
+  "took" : 116,
+  "timed_out" : false,
+  "_shards" : {
+    "total" : 1,
+    "successful" : 1,
+    "failed" : 0
+  },
+  "hits" : {
+    "total" : 3373,
+    "max_score" : 0.0,
+    "hits" : [ ]
+  },
+  "aggregations" : {
+    "discourses" : {
+      "doc_count_error_upper_bound" : 0,
+      "sum_other_doc_count" : 0,
+      "buckets" : [ {
+        "key" : "http://ark.bnf.fr/ark:/12148/cb12083158d",
+        "doc_count" : 44
+      }, {
+        "key" : "http://ark.bnf.fr/ark:/12148/cb119783362",
+        "doc_count" : 33
+      }, {
+        "key" : "http://ark.bnf.fr/ark:/12148/cb13319048g",
+        "doc_count" : 22
+      } ]
+    }
+  }
+}', true));
+
         $this->get('/api/v1/stats/discourses/')->assertTrue($this->response->isOk(), $this->response->content());
+
         $this->seeJsonEquals(["discourses" => [
             "http://ark.bnf.fr/ark:/12148/cb12083158d" => ["label" => "argumentation", "count" => 44],
             "http://ark.bnf.fr/ark:/12148/cb119783362" => ["label" => "bavardage", "count" => 33],
             "http://ark.bnf.fr/ark:/12148/cb13319048g" => ["label" => "chansons", "count" => 22],
         ]]);
+
+    }
+
+    public function testIndexQuery() {
+        Es::shouldReceive('search')
+                ->once()
+                ->with(self::ES_QUERY)
+                ->andReturn(json_decode('{
+  "took" : 116,
+  "timed_out" : false,
+  "_shards" : {
+    "total" : 1,
+    "successful" : 1,
+    "failed" : 0
+  },
+  "hits" : {
+    "total" : 3373,
+    "max_score" : 0.0,
+    "hits" : [ ]
+  },
+  "aggregations" : {
+    "discourses" : {
+      "doc_count_error_upper_bound" : 0,
+      "sum_other_doc_count" : 0,
+      "buckets" : [ ]
+    }
+  }
+}', true));
+        $this->get('/api/v1/stats/discourses/')->assertTrue($this->response->isOk(), $this->response->content());
+        $this->seeJsonEquals(["discourses" => [
+        ]]);
     }
 }
--- a/server/src/tests/Controllers/LanguageControllerTest.php	Thu Oct 20 15:09:31 2016 +0200
+++ b/server/src/tests/Controllers/LanguageControllerTest.php	Thu Oct 20 17:27:36 2016 +0200
@@ -11,16 +11,21 @@
  */
 class LanguageControllerTest extends TestCase {
 
-    private $sparqlClient;
+    const ES_QUERY = [
+        'index' => 'corpus',
+        'body' => [
+            "size" => 0,
+            "query" => [ "match_all" => [] ],
+            "aggs" => [
+                "languages" => [
+                    "terms" => [ "field" => "language", "order" => [ "_count" => "desc" ], "size" => 0 ]
+                ]
+            ]
+        ]
+    ];
 
     public function setUp() {
-
         parent::setup();
-
-        // create a mock of the post repository interface and inject it into the
-        // IoC container
-        $this->sparqlClient = m::mock('CorpusParole\Libraries\Sparql\SparqlClient');
-        $this->app->instance('CorpusParole\Libraries\Sparql\SparqlClient', $this->sparqlClient);
     }
 
     public function tearDown() {
@@ -29,27 +34,46 @@
     }
 
     public function testIndex() {
-        $query = "select ?lang (count(?lang) as ?count) where {
-            ?s a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.
-            ?s <http://purl.org/dc/elements/1.1/language> ?lang
-        }
-        GROUP BY ?lang
-        ORDER BY DESC(?count)";
 
-        $this->sparqlClient
-            ->shouldReceive('query')
-            ->with($query)
-            ->once()
-            ->andReturn(new \ArrayIterator([
-                (object)['lang'=>new Resource('http://lexvo.org/id/iso639-3/gsw'), 'count' => Literal::create(44)],
-                (object)['lang'=>new Resource('http://lexvo.org/id/iso639-3/fra'), 'count' => Literal::create(33)],
-                (object)['lang'=>new Resource('http://lexvo.org/id/iso639-3/bre'), 'count' => Literal::create(22)],
-            ]));
-        $response = $this->get('/api/v1/stats/languages/')->
-            seeJsonEquals(['languages' => [
-                'http://lexvo.org/id/iso639-3/gsw' => 44,
-                'http://lexvo.org/id/iso639-3/fra' => 33,
-                'http://lexvo.org/id/iso639-3/bre' => 22,
+        Es::shouldReceive('search')
+                ->once()
+                ->with(self::ES_QUERY)
+                ->andReturn(json_decode('{
+    "took": 92,
+    "timed_out": false,
+    "_shards": {
+        "total": 1,
+        "successful": 1,
+        "failed": 0
+    },
+    "hits": {
+        "total": 3373,
+        "max_score": 0.0,
+        "hits": []
+    },
+    "aggregations": {
+        "languages": {
+            "doc_count_error_upper_bound": 0,
+            "sum_other_doc_count": 0,
+            "buckets": [{
+                "key": "http://lexvo.org/id/iso639-3/fra",
+                "doc_count": 1669
+            }, {
+                "key": "http://lexvo.org/id/iso639-3/gsw",
+                "doc_count": 851
+            }, {
+                "key": "http://lexvo.org/id/iso639-3/bre",
+                "doc_count": 403
+            }]
+        }
+    }
+}', true));
+
+        $response = $this->get('/api/v1/stats/languages/')->assertTrue($this->response->isOk(), $this->response->content());
+        $this->seeJsonEquals(['languages' => [
+                'http://lexvo.org/id/iso639-3/fra' => 1669,
+                'http://lexvo.org/id/iso639-3/gsw' => 851,
+                'http://lexvo.org/id/iso639-3/bre' => 403,
             ]]);
     }
 }