# HG changeset patch # User ymh # Date 1475627485 -7200 # Node ID aefaad270b9bd6675a42c677a2543ab832575d0c # Parent 0fce13da58af73c98cbe18bb82ce296ddd555b1f reimplement ThemeController using ES requests to be able to sort by label diff -r 0fce13da58af -r aefaad270b9b cms/app-client/app/controllers/application.js --- a/cms/app-client/app/controllers/application.js Tue Oct 04 13:53:56 2016 +0200 +++ b/cms/app-client/app/controllers/application.js Wed Oct 05 02:31:25 2016 +0200 @@ -69,7 +69,7 @@ } }); if(!this.arraysEqual(this.get('date').toArray(), intervals)) { - this.set('date', intervals); + this.set('date', intervals); } }), @@ -91,14 +91,14 @@ return true; }, - + itemObserver: Ember.observer('player.item', function() { var self = this; this.store.findRecord('document', this.get('player').get('item'), { reload: true }).then(function(model){ self.get('player').set('model', model); if (self.get('player').get('model').get('transcript')) { - self.store.findRecord('transcript', encodeURIComponent(self.get('player').get('item'))).then(function(model) { + self.store.findRecord('transcript', self.get('player').get('item')).then(function(model) { self.get('player').set('transcript', model); }); } else { @@ -120,7 +120,7 @@ }), noticeModelObserver: Ember.observer('noticeModel', function() { if (!this.get('noticeModel')) { - this.set('notice', null); + this.set('notice', null); } }), diff -r 0fce13da58af -r aefaad270b9b server/src/app/Console/Commands/IndexDocuments.php --- a/server/src/app/Console/Commands/IndexDocuments.php Tue Oct 04 13:53:56 2016 +0200 +++ b/server/src/app/Console/Commands/IndexDocuments.php Wed Oct 05 02:31:25 2016 +0200 @@ -3,10 +3,14 @@ namespace CorpusParole\Console\Commands; use Illuminate\Console\Command; +use EasyRdf\Resource; + use GuzzleHttp\Client; +use CorpusParole\Libraries\Utils; use CorpusParole\Repositories\DocumentRepository; use CorpusParole\Libraries\CocoonUtils; use CorpusParole\Models\GeonamesHierarchy; +use CorpusParole\Services\BnfResolverInterface; use Es; class IndexDocuments extends Command @@ -34,9 +38,10 @@ * * @return void */ - public function __construct(DocumentRepository $documentRepository, Client $httpClient) + public function __construct(DocumentRepository $documentRepository, Client $httpClient, BnfResolverInterface $bnfResolver) { $this->documentRepository = $documentRepository; + $this->bnfResolver = $bnfResolver; $this->httpClient = $httpClient; parent::__construct(); } @@ -82,7 +87,15 @@ ], 'date' => [ 'type' => 'date' ], 'geonames_hyerarchy' => [ 'type' => 'string' ], - 'location' => [ 'type' => 'geo_point' ] + 'location' => [ 'type' => 'geo_point' ], + 'subject' => [ + 'type' => 'nested', + 'properties' => [ + 'label' => [ 'type' => 'string', 'index' => 'not_analyzed'], + 'code' => [ 'type' => 'string', 'index' => 'not_analyzed'], + 'label_code' => [ 'type' => 'string', 'index' => 'not_analyzed'] + ] + ] // TODO: add location information ] ] @@ -154,6 +167,29 @@ } /** + * get subjects as { 'label': label, 'code': code } objects + * Takes only into account the bnf subjects + */ + private function getSubjects($doc) { + + $sres = array_reduce($doc->getSubjects(), function($res, $s) { + $m = []; + if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $m) === 1) { + array_push($res, [ + 'uri' => $m[0], + 'code' => $m[1] + ]); + } + return $res; + }, []); + + $labels = $this->bnfResolver->getLabels(array_unique(array_map(function($so) { return $so['uri'];}, $sres))); + + return array_map(function($so) use ($labels) { return [ 'label' => $labels[$so['uri']], 'code' => $so['code'], 'label_code' => $labels[$so['uri']]."|".$so['code'] ]; }, $sres); + + } + + /** * Index one document into Elasticsearch * * @return int (1 if sucess, 0 if error) @@ -168,7 +204,8 @@ 'body' => [ 'title' => (string)$doc->getTitle(), 'date' => (string)$doc->getModified(), - 'geonames_hierarchy' => $this->getGeonamesHierarchy($doc) + 'geonames_hierarchy' => $this->getGeonamesHierarchy($doc), + 'subject' => $this->getSubjects($doc) ] ]; Es::index($query_data); @@ -182,7 +219,8 @@ private function indexBulk($docs) { $query_data = ['body' => []]; - foreach($docs as $doc){ + foreach($docs as $resultDoc){ + $doc = $this->documentRepository->get($resultDoc->getId()); $query_data['body'][] = [ 'index' => [ '_index' => config('elasticsearch.index'), @@ -192,7 +230,9 @@ ]; $query_data['body'][] = [ 'title' => (string)$doc->getTitle(), - 'date' => (string)$doc->getModified() + 'date' => (string)$doc->getModified(), + 'geonames_hierarchy' => $this->getGeonamesHierarchy($doc), + 'subject' => $this->getSubjects($doc) ]; } Es::bulk($query_data); @@ -263,18 +303,18 @@ break; } $this->indexOne($doc); + $progressBar->setMessage($doc->getId()); $progressBar->advance(); - $progressBar->setMessage($doc->getId()); } } else { $this->indexBulk($docs); + $progressBar->setMessage('Page '.$page); $progressBar->advance(); - $progressBar->setMessage('Page '.$page); } } $progressBar->finish(); - $this->info('Indexing completed'); + $this->info("\nIndexing completed"); } } diff -r 0fce13da58af -r aefaad270b9b server/src/app/Exceptions/Handler.php --- a/server/src/app/Exceptions/Handler.php Tue Oct 04 13:53:56 2016 +0200 +++ b/server/src/app/Exceptions/Handler.php Wed Oct 05 02:31:25 2016 +0200 @@ -39,10 +39,17 @@ public function render($request, Exception $e) { if ( $request->isXmlHttpRequest() || $request->wantsJson() ) { + + $statusCode = 500; + if(is_callable([$e, 'getStatusCode'])) { + $statusCode = $e->getStatusCode(); + } + return response()->json([ + 'code' => $statusCode, 'message' => class_basename( $e ) . ' in ' . basename( $e->getFile() ) . ' line ' . $e->getLine() . ( ( $message = $e->getMessage() ) ? ': ' . $e->getMessage() : '.' ), 'trace' => $e->getTrace() - ], 500); + ], $statusCode); } return parent::render($request, $e); } diff -r 0fce13da58af -r aefaad270b9b server/src/app/Http/Controllers/Api/ThemeController.php --- a/server/src/app/Http/Controllers/Api/ThemeController.php Tue Oct 04 13:53:56 2016 +0200 +++ b/server/src/app/Http/Controllers/Api/ThemeController.php Wed Oct 05 02:31:25 2016 +0200 @@ -10,6 +10,9 @@ use EasyRdf\Resource; use EasyRdf\Literal; +use Es; +use Log; + use CorpusParole\Libraries\Sparql\SparqlClient; use CorpusParole\Services\LexvoResolverInterface; use CorpusParole\Services\BnfResolverInterface; @@ -32,80 +35,65 @@ */ public function index(Request $request) { - $filter = $request->input('filter', 'bnf'); - $resolve = filter_var($request->input('resolve', true), FILTER_VALIDATE_BOOLEAN); + $index = $request->input('index', 0); + $limit = $request->input('limit', 0); + $sort = $request->input('sort', 'count'); - $filterClause = ""; - if($filter === 'bnf') { - $filterClause = "FILTER (isIRI(?o) && regex(str(?o), '^".config('corpusparole.bnf_ark_base_url')."')) ."; - } - elseif($filter === 'uri') { - $filterClause = "FILTER isIRI(?o) ."; - } - elseif($filter === 'all' || $filter === 'none' || $filter === '') { - $filterClause = ""; - } - else { - abort(401,"Value for filter parameter must be in 'bnf', 'uri', 'all' or 'none'"); + if($sort == "count" || $sort == "descending") { + $order_field = "_count"; + $order_dir = "desc"; + } elseif($sort == "-count") { + $order_field = "_count"; + $order_dir = "asc"; + } elseif ($sort == "label" || $sort == "alphabetical") { + $order_field = "_term"; + $order_dir = "asc"; + } elseif ($sort == "-label") { + $order_field = "_term"; + $order_dir = "desc"; + } else { + $order_field = "_count"; + $order_dir = "desc"; } - $query = preg_replace('/\s+/', ' ', - "select (?o as ?theme) (COUNT(?s) as ?count) where { - ?s a . - ?s ?o . - $filterClause - } - GROUP BY ?o - ORDER BY DESC(?count)"); + $query = [ + 'index' => env('ELASTICSEARCH_INDEX'), + 'body' => [ + 'size' => 0, + 'aggs' => [ + "subjects" => [ + "nested" => [ "path" => "subject" ], + "aggs" => [ + "subjects" => [ + "terms" => [ + "field" => "subject.label_code", + "size" => $limit * ($index+1), + "order" => [ $order_field => $order_dir ] + ] + ] + ] + ] + ] + ] + ]; - $docs = $this->sparqlClient->query($query); + $esRes = Es::search($query); $themes = []; - $labels = []; - $bnfUris = []; - $lexvoUris = []; + $bucketList = array_slice($esRes['aggregations']['subjects']['subjects']['buckets'], $index*$limit, $limit); - foreach ($docs as $row) { - $key = ""; - $label = null; - if($row->theme instanceof Resource) { - $key = $row->theme->getUri(); - if($resolve && strpos($key, config('corpusparole.bnf_ark_base_url')) === 0) { - array_push($bnfUris, $key); - } - elseif($resolve && strpos($key, config('corpusparole.lexvo_base_url')) === 0) { - array_push($lexvoUris, $key); - } - $label = null; - } - elseif($row->theme instanceof Literal) { - $key = $row->theme->getValue(); - $label = $row->theme->getValue(); - } - - $themes[$key] = [ + foreach($bucketList as $bucket) { + $parts = explode("|", $bucket['key']); + $label = $parts[0]; + $url = config('corpusparole.bnf_ark_base_url').$parts[1]; + $themes[$url] = [ "label" => $label, - "count" => $row->count->getValue() + "count" => $bucket['doc_count'] ]; } - if($resolve) { - if(count($lexvoUris) > 0) { - $labels = $this->lexvoResolver->getNames($lexvoUris); - } - if(count($bnfUris) > 0) { - $labels = array_merge($labels, $this->bnfResolver->getLabels($bnfUris)); - } - foreach ($themes as $themeKey => $themeDef) { - if(array_key_exists($themeKey, $labels)) { - $themeDef['label'] = $labels[$themeKey]; - } - $themes[$themeKey] = $themeDef; - } - } - return response()->json(['themes' => $themes ]); } diff -r 0fce13da58af -r aefaad270b9b server/src/config/corpusparole.php --- a/server/src/config/corpusparole.php Tue Oct 04 13:53:56 2016 +0200 +++ b/server/src/config/corpusparole.php Wed Oct 05 02:31:25 2016 +0200 @@ -119,6 +119,7 @@ 'bnf_base_url' => 'http://data.bnf.fr/', 'bnf_ark_base_url' => 'http://ark.bnf.fr/', + 'bnf_ark_url_regexp' => '/http[s]?\:\/\/(?:data|ark)\.bnf\.fr\/(ark\:\/12148\/[[:alnum:]]+)\/?/', 'bnf_cache_expiration' => 60*24*30, 'bnf_max_ids' => 5, 'bnf_query_url' => 'http://data.bnf.fr/sparql',