--- a/server/src/app/Console/Commands/IndexDocuments.php Thu Oct 20 12:56:24 2016 +0530
+++ b/server/src/app/Console/Commands/IndexDocuments.php Thu Oct 20 11:24:45 2016 +0200
@@ -109,6 +109,13 @@
'creation_date' => ['type' => 'date', 'index' => 'not_analyzed'],
'language' => ['type' => 'string', 'index' => 'not_analyzed'],
'discourse_types' => ['type' => 'string', 'index' => 'not_analyzed'],
+ 'creation_years' => [
+ 'type' => 'nested',
+ 'properties' => [
+ 'year' => [ 'type' => 'short', 'index' => 'not_analyzed'],
+ 'weight' => [ 'type' => 'float', 'index' => 'not_analyzed'],
+ ]
+ ] ,
'subject' => [
'type' => 'nested',
'properties' => [
@@ -405,7 +412,7 @@
return $date;
}
- private function processPeriod($periodStr) {
+ private function processPeriod($periodStr, $asDate=false) {
$start = null;
$end = null;
foreach(explode(";", $periodStr) as $elem) {
@@ -436,18 +443,59 @@
return null;
}
- return array_map(function($y) {
- return \DateTime::createFromFormat("Y", "$y")->format(\DateTime::W3C);
+ return array_map(function($y) use ($asDate){
+ $date = \DateTime::createFromFormat("Y", "$y");
+ if($asDate) {
+ return $date;
+ } else {
+ return $date->format(\DateTime::W3C);
+ }
+
}, range($start, $end));
}
- private function processDate($dateStr) {
+ private function processDate($dateStr, $asDate=false) {
$date = $this->extractDate($dateStr);
if(is_null($date)) {
return null;
} else {
- return $date->format(\DateTime::W3C);
+ if($asDate) {
+ return $date;
+ } else {
+ return $date->format(\DateTime::W3C);
+ }
+
+ }
+ }
+
+ private function getCreationYears($doc) {
+ $created = $doc->getCreated();
+ if(is_null($created)) {
+ return [];
}
+ $dateType = $created->getDatatypeUri();
+ $dates = null;
+
+ if($dateType === "http://purl.org/dc/terms/Period") {
+ $dates = $this->processPeriod($created->getValue(), true);
+ }
+ elseif($dateType === "http://purl.org/dc/terms/W3CDTF") {
+ $dates = $this->processDate($created->getValue(), true);
+ if(!is_null($dates)) {
+ $dates = [ $dates, ];
+ }
+ }
+ if(is_null($dates)) {
+ return [];
+ }
+ $count = count($dates);
+ return array_map(function($d) use ($count) {
+ return [
+ 'year' => intval($d->format("Y")),
+ 'weight' => 1/$count
+ ];
+
+ }, $dates);
}
private function getDiscourseTypes($doc) {
@@ -472,6 +520,7 @@
'date' => (string)$doc->getModified(),
'location' => $this->getLocation($doc),
'creation_date' => $this->getCreationDate($doc),
+ 'creation_years' => $this->getCreationYears($doc),
'language' => $doc->getLanguagesValue(),
'discourse_types' => $this->getDiscourseTypes($doc),
'geonames_hierarchy' => $this->getGeonamesHierarchy($doc),
--- a/server/src/tests/Controllers/DateStatsControllerTest.php Thu Oct 20 12:56:24 2016 +0530
+++ b/server/src/tests/Controllers/DateStatsControllerTest.php Thu Oct 20 11:24:45 2016 +0200
@@ -2,20 +2,50 @@
use Mockery as m;
+use Es;
+
use EasyRdf\Literal;
class DateStatsControllerTest extends TestCase
{
private $sparqlClient;
+ const ES_QUERY = [
+ 'index' => 'corpus',
+ 'body' => [
+ "size" => 0,
+ "query" => [ "match_all" => []],
+ "aggs" => [
+ "datestats" => [
+ "nested"=> [
+ "path" => "creation_years"
+ ],
+ "aggs" => [
+ "years" => [
+ "terms"=> [
+ "field" => "creation_years.year",
+ "size" => 0,
+ "order" => [
+ "_term" => "asc"
+ ]
+ ],
+ "aggs" => [
+ "year_count" => [
+ "sum" => [
+ "field" => "creation_years.weight"
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
+
public function setUp() {
parent::setup();
-
- // create a mock of the post repository interface and inject it into the
- // IoC container
- $this->sparqlClient = m::mock('CorpusParole\Libraries\Sparql\SparqlClient');
- $this->app->instance('CorpusParole\Libraries\Sparql\SparqlClient', $this->sparqlClient);
}
public function tearDown() {
@@ -26,243 +56,99 @@
public function testIndexQuery() {
- $query = preg_replace('/\s+/', ' ', "SELECT (?d as ?date) (COUNT(?d) AS ?count)
- WHERE {
- ?_ a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.
- ?_ <http://purl.org/dc/terms/created> ?d
- }
- GROUP BY ?d
- ORDER BY ?d");
-
-
- $this->sparqlClient
- ->shouldReceive('query')
- ->with($query)
- ->once()
- ->andReturn(new \ArrayIterator([]));
- $this->get('/api/v1/stats/datestats/');
- $this->seeJsonEquals(["datestats" => []]);
- }
-
- public function testIndexMultiple() {
-
- $this->sparqlClient
- ->shouldReceive('query')
- ->once()
- ->andReturn(new \ArrayIterator([
- (object)['date'=>new Literal('1975', null, "http://purl.org/dc/terms/W3CDTF"), 'count' => Literal::create(1)],
- (object)['date'=>new Literal('1965', null, "http://purl.org/dc/terms/W3CDTF"), 'count' => Literal::create(2)],
- (object)['date'=>new Literal('1955', null, "http://purl.org/dc/terms/W3CDTF"), 'count' => Literal::create(3)],
- ]));
- $this->get('/api/v1/stats/datestats/')->assertTrue($this->response->isOk(), $this->response->content());
- $this->seeJsonEquals(["datestats" => [
- "1955" => 3,
- "1965" => 2,
- "1975" => 1,
- ]]);
- }
-
- public function testIndexSimple() {
-
- $this->sparqlClient
- ->shouldReceive('query')
- ->once()
- ->andReturn(new \ArrayIterator([
- (object)['date'=>new Literal('1955', null, "http://purl.org/dc/terms/W3CDTF"), 'count' => Literal::create(1)],
- (object)['date'=>new Literal('1965', null, "http://purl.org/dc/terms/W3CDTF"), 'count' => Literal::create(1)],
- (object)['date'=>new Literal('1975', null, "http://purl.org/dc/terms/W3CDTF"), 'count' => Literal::create(1)],
- ]));
- $this->get('/api/v1/stats/datestats/')->assertTrue($this->response->isOk(), $this->response->content());
- $this->seeJsonEquals(["datestats" => [
- "1955" => 1,
- "1965" => 1,
- "1975" => 1,
- ]]);
- }
-
- public function testIndexPeriod() {
-
- $this->sparqlClient
- ->shouldReceive('query')
- ->once()
- ->andReturn(new \ArrayIterator([
- (object)['date'=>new Literal('start=1955; end=1965', null, "http://purl.org/dc/terms/Period"), 'count' => Literal::create(11)],
- ]));
- $this->get('/api/v1/stats/datestats/')->assertTrue($this->response->isOk(), $this->response->content());
- $this->seeJsonEquals(["datestats" => [
- "1955" => 1,
- "1956" => 1,
- "1957" => 1,
- "1958" => 1,
- "1959" => 1,
- "1960" => 1,
- "1961" => 1,
- "1962" => 1,
- "1963" => 1,
- "1964" => 1,
- "1965" => 1,
- ]]);
- }
-
- public function testIndexPeriodRemainMore() {
-
- $this->sparqlClient
- ->shouldReceive('query')
- ->once()
- ->andReturn(new \ArrayIterator([
- (object)['date'=>new Literal('start=1955; end=1965', null, "http://purl.org/dc/terms/Period"), 'count' => Literal::create(15)],
- ]));
- $this->get('/api/v1/stats/datestats/')->assertTrue($this->response->isOk(), $this->response->content());
- $this->seeJsonEquals(["datestats" => [
- "1955" => 2,
- "1956" => 2,
- "1957" => 2,
- "1958" => 2,
- "1959" => 1,
- "1960" => 1,
- "1961" => 1,
- "1962" => 1,
- "1963" => 1,
- "1964" => 1,
- "1965" => 1,
- ]]);
+ Es::shouldReceive('search')
+ ->once()
+ ->with(self::ES_QUERY)
+ ->andReturn(json_decode('{
+ "took" : 132,
+ "timed_out" : false,
+ "_shards" : {
+ "total" : 1,
+ "successful" : 1,
+ "failed" : 0
+ },
+ "hits" : {
+ "total" : 3373,
+ "max_score" : 0.0,
+ "hits" : [ ]
+ },
+ "aggregations" : {
+ "datestats" : {
+ "doc_count" : 3725,
+ "years" : {
+ "doc_count_error_upper_bound" : 0,
+ "sum_other_doc_count" : 0,
+ "buckets" : []
+ }
+ }
+ }
+ }', true));
+ $this->get('/api/v1/stats/datestats/')->assertTrue($this->response->isOk(), $this->response->content());
+ $this->seeJsonEquals(["datestats" => []]);
}
- public function testIndexPeriodRemainLess() {
-
- $this->sparqlClient
- ->shouldReceive('query')
- ->once()
- ->andReturn(new \ArrayIterator([
- (object)['date'=>new Literal('start=1955; end=1965', null, "http://purl.org/dc/terms/Period"), 'count' => Literal::create(10)],
- ]));
- $this->get('/api/v1/stats/datestats/')->assertTrue($this->response->isOk(), $this->response->content());
- $this->seeJsonEquals(["datestats" => [
- "1955" => 1,
- "1956" => 1,
- "1957" => 1,
- "1958" => 1,
- "1959" => 1,
- "1960" => 1,
- "1961" => 1,
- "1962" => 1,
- "1963" => 1,
- "1964" => 1,
- ]]);
- }
-
- public function testIndexMix() {
+ public function testIndexResult() {
- $this->sparqlClient
- ->shouldReceive('query')
- ->once()
- ->andReturn(new \ArrayIterator([
- (object)['date'=>new Literal('start=1955; end=1965', null, "http://purl.org/dc/terms/Period"), 'count' => Literal::create(11)],
- (object)['date'=>new Literal('1960', null, "http://purl.org/dc/terms/W3CDTF"), 'count' => Literal::create(2)],
- ]));
- $this->get('/api/v1/stats/datestats/')->assertTrue($this->response->isOk(), $this->response->content());
- $this->seeJsonEquals(["datestats" => [
- "1955" => 1,
- "1956" => 1,
- "1957" => 1,
- "1958" => 1,
- "1959" => 1,
- "1960" => 3,
- "1961" => 1,
- "1962" => 1,
- "1963" => 1,
- "1964" => 1,
- "1965" => 1,
- ]]);
- }
-
- public function testIndexBadDate() {
-
- $this->sparqlClient
- ->shouldReceive('query')
- ->once()
- ->andReturn(new \ArrayIterator([
- (object)['date'=>new Literal('1955', null, "http://purl.org/dc/terms/W3CDTF"), 'count' => Literal::create(1)],
- (object)['date'=>new Literal('HELLO', null, "http://purl.org/dc/terms/W3CDTF"), 'count' => Literal::create(1)],
- (object)['date'=>new Literal('1975', null, "http://purl.org/dc/terms/W3CDTF"), 'count' => Literal::create(1)],
- ]));
- $this->get('/api/v1/stats/datestats/')->assertTrue($this->response->isOk(), $this->response->content());
- $this->seeJsonEquals(["datestats" => [
- "1955" => 1,
- "1975" => 1,
- ]]);
+ Es::shouldReceive('search')
+ ->once()
+ ->with(self::ES_QUERY)
+ ->andReturn(json_decode('{
+ "took" : 132,
+ "timed_out" : false,
+ "_shards" : {
+ "total" : 1,
+ "successful" : 1,
+ "failed" : 0
+ },
+ "hits" : {
+ "total" : 3373,
+ "max_score" : 0.0,
+ "hits" : [ ]
+ },
+ "aggregations" : {
+ "datestats" : {
+ "doc_count" : 3725,
+ "years" : {
+ "doc_count_error_upper_bound" : 0,
+ "sum_other_doc_count" : 0,
+ "buckets" : [ {
+ "key" : 1948,
+ "doc_count" : 3,
+ "year_count" : { "value" : 3.0 }
+ }, {
+ "key" : 1957,
+ "doc_count" : 29,
+ "year_count" : { "value" : 29.0 }
+ }, {
+ "key" : 1963,
+ "doc_count" : 22,
+ "year_count" : { "value" : 21.5 }
+ }, {
+ "key" : 1970,
+ "doc_count" : 411,
+ "year_count" : { "value" : 403.68333334475756 }
+ }, {
+ "key" : 1986,
+ "doc_count" : 68,
+ "year_count" : { "value" : 14.133333388715982 }
+ }, {
+ "key" : 1996,
+ "doc_count" : 40,
+ "year_count" : { "value" : 36.05000001564622 }
+ } ]
+ }
+ }
+ }
+ }', true));
+ $this->get('/api/v1/stats/datestats/')->assertTrue($this->response->isOk(), $this->response->content());
+ $this->seeJsonEquals(["datestats" => [
+ "1948" => 3,
+ "1957" => 29,
+ "1963" => 22,
+ "1970" => 404,
+ "1986" => 14,
+ "1996" => 36
+ ]]);
}
- public function testIndexBadPeriod() {
-
- $this->sparqlClient
- ->shouldReceive('query')
- ->once()
- ->andReturn(new \ArrayIterator([
- (object)['date'=>new Literal('start=1955; end=FOO', null, "http://purl.org/dc/terms/Period"), 'count' => Literal::create(11)],
- ]));
- $this->get('/api/v1/stats/datestats/')->assertTrue($this->response->isOk(), $this->response->content());
- $this->seeJsonEquals(["datestats" => [
- ]]);
- }
-
- public function testIndexBadPeriodMissing() {
-
- $this->sparqlClient
- ->shouldReceive('query')
- ->once()
- ->andReturn(new \ArrayIterator([
- (object)['date'=>new Literal('start=1955', null, "http://purl.org/dc/terms/Period"), 'count' => Literal::create(11)],
- ]));
- $this->get('/api/v1/stats/datestats/')->assertTrue($this->response->isOk(), $this->response->content());
- $this->seeJsonEquals(["datestats" => [
- ]]);
- }
-
- public function testIndexFullPeriod() {
-
- $this->sparqlClient
- ->shouldReceive('query')
- ->once()
- ->andReturn(new \ArrayIterator([
- (object)['date'=>new Literal('start=1955; end=1965; scheme=v3; name=v4;', null, "http://purl.org/dc/terms/Period"), 'count' => Literal::create(11)],
- ]));
- $this->get('/api/v1/stats/datestats/')->assertTrue($this->response->isOk(), $this->response->content());
- $this->seeJsonEquals(["datestats" => [
- "1955" => 1,
- "1956" => 1,
- "1957" => 1,
- "1958" => 1,
- "1959" => 1,
- "1960" => 1,
- "1961" => 1,
- "1962" => 1,
- "1963" => 1,
- "1964" => 1,
- "1965" => 1,
- ]]);
- }
-
- public function testIndexMultipleFormat() {
-
- $this->sparqlClient
- ->shouldReceive('query')
- ->once()
- ->andReturn(new \ArrayIterator([
- (object)['date'=>new Literal('1975-02-05', null, "http://purl.org/dc/terms/W3CDTF"), 'count' => Literal::create(1)],
- (object)['date'=>new Literal('1965-03', null, "http://purl.org/dc/terms/W3CDTF"), 'count' => Literal::create(2)],
- (object)['date'=>new Literal('1955-02-12T08:30:00+00:00', null, "http://purl.org/dc/terms/W3CDTF"), 'count' => Literal::create(3)],
- (object)['date'=>new Literal('1950-08-18T08:30:00Z', null, "http://purl.org/dc/terms/W3CDTF"), 'count' => Literal::create(4)],
- ]));
- $this->get('/api/v1/stats/datestats/')->assertTrue($this->response->isOk(), $this->response->content());
- $this->seeJsonEquals(["datestats" => [
- "1950" => 4,
- "1955" => 3,
- "1965" => 2,
- "1975" => 1,
- ]]);
- }
-
-
-
}