server/src/app/Http/Controllers/Api/DateStatsController.php
changeset 375 145561ff51ff
parent 307 07b44a378ad8
child 377 52169c718513
equal deleted inserted replaced
374:c622fa18eb32 375:145561ff51ff
     3 namespace CorpusParole\Http\Controllers\Api;
     3 namespace CorpusParole\Http\Controllers\Api;
     4 
     4 
     5 // use CorpusParole\Http\Requests;
     5 // use CorpusParole\Http\Requests;
     6 use Illuminate\Http\Request;
     6 use Illuminate\Http\Request;
     7 use Log;
     7 use Log;
       
     8 use Es;
     8 
     9 
     9 use CorpusParole\Libraries\Sparql\SparqlClient;
    10 use CorpusParole\Libraries\Sparql\SparqlClient;
    10 
    11 
    11 use CorpusParole\Http\Controllers\Controller;
    12 use CorpusParole\Http\Controllers\Controller;
    12 
    13 
    24      *
    25      *
    25      * @return \Illuminate\Http\Response
    26      * @return \Illuminate\Http\Response
    26      */
    27      */
    27     public function index(Request $request)
    28     public function index(Request $request)
    28     {
    29     {
    29         $query =  preg_replace('/\s+/', ' ', "SELECT (?d as ?date) (COUNT(?d) AS ?count)
       
    30             WHERE {
       
    31                 ?_ a <http://www.europeana.eu/schemas/edm/ProvidedCHO>.
       
    32                 ?_ <http://purl.org/dc/terms/created> ?d
       
    33             }
       
    34             GROUP BY ?d
       
    35             ORDER BY ?d");
       
    36 
    30 
    37         $res = $this->sparqlClient->query($query);
    31         $query = [ "match_all" => []];
    38 
    32 
    39         $dates = [];
    33         $esQuery = [
       
    34             'index' => env('ELASTICSEARCH_INDEX'),
       
    35             'body' => [
       
    36                 "size" => 0,
       
    37                 "query" => $query,
       
    38                 "aggs" => [
       
    39                     "datestats" => [
       
    40                         "nested"=> [
       
    41                             "path" => "creation_years"
       
    42                         ],
       
    43                         "aggs" => [
       
    44                             "years" => [
       
    45                                 "terms"=> [
       
    46                                     "field" => "creation_years.year",
       
    47                                     "size" => 0,
       
    48                                     "order" => [
       
    49                                         "_term" => "asc"
       
    50                                     ]
       
    51                                 ],
       
    52                                 "aggs" => [
       
    53                                     "year_count" => [
       
    54                                         "sum" => [
       
    55                                             "field" => "creation_years.weight"
       
    56                                         ]
       
    57                                     ]
       
    58                                 ]
       
    59                             ]
       
    60                         ]
       
    61                     ]
       
    62                 ]
       
    63             ]
       
    64         ];
       
    65         $esRes = Es::search($esQuery);
    40 
    66 
    41         foreach ($res as $row) {
    67         $datestats = [];
    42 
    68 
    43             $count = intval($row->count->getValue());
    69         foreach($esRes['aggregations']['datestats']['years']['buckets'] as $bucket) {
    44             $date = $row->date;
    70             $datestats[(string)($bucket['key'])] = round($bucket['year_count']['value']);
    45             $dateType = $date->getDatatypeUri();
       
    46 
       
    47             $processedDates = [];
       
    48             if($dateType === "http://purl.org/dc/terms/Period") {
       
    49                 $processedDates = $this->processPeriod($date->getValue(), $count);
       
    50             }
       
    51             elseif($dateType === "http://purl.org/dc/terms/W3CDTF") {
       
    52                 $processedDates = $this->processDate($date->getValue(), $count);
       
    53             }
       
    54 
       
    55             $dates = array_reduce(array_keys($processedDates), function($datesArray, $item) use ($processedDates) {
       
    56                 if(!isset($datesArray[$item])) {
       
    57                     $datesArray[$item] = 0;
       
    58                 }
       
    59                 $datesArray[$item] += $processedDates[$item];
       
    60                 return $datesArray;
       
    61             }, $dates);
       
    62         }
    71         }
    63 
    72 
    64         ksort($dates);
    73         return response()->json(['datestats' => $datestats ]);
    65 
       
    66         return response()->json(['datestats' => $dates ]);
       
    67     }
    74     }
    68 
    75 
    69     private function extractYear($dateStr) {
       
    70         if(preg_match("/^\\d{4}$/", $dateStr) === 1) {
       
    71             $dateStr = "$dateStr-1-1";
       
    72         }
       
    73         $date = date_create($dateStr);
       
    74         if($date === false ) {
       
    75             Log::warning("DateStatsController:extractYear bad format for date $dateStr");
       
    76         }
       
    77         return $date?$date->format("Y"):false;
       
    78     }
       
    79 
       
    80     private function processPeriod($periodStr, $count) {
       
    81         $start = null;
       
    82         $end = null;
       
    83         foreach(explode(";", $periodStr) as $elem) {
       
    84             $elem = trim($elem);
       
    85             if(strpos($elem, 'start=') === 0) {
       
    86                 $start = intval($this->extractYear(trim(substr($elem, 6))));
       
    87                 if($start === false) {
       
    88                     return [];
       
    89                 }
       
    90             } elseif(strpos($elem, 'end=') === 0) {
       
    91                 $end = intval($this->extractYear(trim(substr($elem, 4))));
       
    92                 if($end === false) {
       
    93                     return [];
       
    94                 }
       
    95             }
       
    96         }
       
    97 
       
    98         if(is_null($start) || is_null($end) || $start>$end ) {
       
    99             // TODO: log problem
       
   100             return [];
       
   101         }
       
   102 
       
   103         $res = [];
       
   104         $mean = (int)($count/($end+1-$start));
       
   105         $remains = $count%($end+1-$start);
       
   106         for($d=$start; $d<=$end; $d++) {
       
   107             $nb = $mean + ((($remains--)>0)?1:0);
       
   108             if($nb !== 0) {
       
   109                 $res[strval($d)] = $nb;
       
   110             }
       
   111         }
       
   112 
       
   113         return $res;
       
   114     }
       
   115 
       
   116     private function processDate($dateStr, $count) {
       
   117         $date = $this->extractYear($dateStr);
       
   118         if($date === false)  {
       
   119             return [];
       
   120         } else {
       
   121             return [ $this->extractYear($dateStr) => $count ];
       
   122         }
       
   123     }
       
   124 
       
   125 
       
   126 }
    76 }