--- a/cms/app-client/app/controllers/application.js Tue Oct 04 13:53:56 2016 +0200
+++ b/cms/app-client/app/controllers/application.js Wed Oct 05 02:31:25 2016 +0200
@@ -69,7 +69,7 @@
}
});
if(!this.arraysEqual(this.get('date').toArray(), intervals)) {
- this.set('date', intervals);
+ this.set('date', intervals);
}
}),
@@ -91,14 +91,14 @@
return true;
},
-
+
itemObserver: Ember.observer('player.item', function() {
var self = this;
this.store.findRecord('document', this.get('player').get('item'), { reload: true }).then(function(model){
self.get('player').set('model', model);
if (self.get('player').get('model').get('transcript')) {
- self.store.findRecord('transcript', encodeURIComponent(self.get('player').get('item'))).then(function(model) {
+ self.store.findRecord('transcript', self.get('player').get('item')).then(function(model) {
self.get('player').set('transcript', model);
});
} else {
@@ -120,7 +120,7 @@
}),
noticeModelObserver: Ember.observer('noticeModel', function() {
if (!this.get('noticeModel')) {
- this.set('notice', null);
+ this.set('notice', null);
}
}),
--- a/server/src/app/Console/Commands/IndexDocuments.php Tue Oct 04 13:53:56 2016 +0200
+++ b/server/src/app/Console/Commands/IndexDocuments.php Wed Oct 05 02:31:25 2016 +0200
@@ -3,10 +3,14 @@
namespace CorpusParole\Console\Commands;
use Illuminate\Console\Command;
+use EasyRdf\Resource;
+
use GuzzleHttp\Client;
+use CorpusParole\Libraries\Utils;
use CorpusParole\Repositories\DocumentRepository;
use CorpusParole\Libraries\CocoonUtils;
use CorpusParole\Models\GeonamesHierarchy;
+use CorpusParole\Services\BnfResolverInterface;
use Es;
class IndexDocuments extends Command
@@ -34,9 +38,10 @@
*
* @return void
*/
- public function __construct(DocumentRepository $documentRepository, Client $httpClient)
+ public function __construct(DocumentRepository $documentRepository, Client $httpClient, BnfResolverInterface $bnfResolver)
{
$this->documentRepository = $documentRepository;
+ $this->bnfResolver = $bnfResolver;
$this->httpClient = $httpClient;
parent::__construct();
}
@@ -82,7 +87,15 @@
],
'date' => [ 'type' => 'date' ],
'geonames_hyerarchy' => [ 'type' => 'string' ],
- 'location' => [ 'type' => 'geo_point' ]
+ 'location' => [ 'type' => 'geo_point' ],
+ 'subject' => [
+ 'type' => 'nested',
+ 'properties' => [
+ 'label' => [ 'type' => 'string', 'index' => 'not_analyzed'],
+ 'code' => [ 'type' => 'string', 'index' => 'not_analyzed'],
+ 'label_code' => [ 'type' => 'string', 'index' => 'not_analyzed']
+ ]
+ ]
// TODO: add location information
]
]
@@ -154,6 +167,29 @@
}
/**
+ * get subjects as { 'label': label, 'code': code } objects
+ * Takes only into account the bnf subjects
+ */
+ private function getSubjects($doc) {
+
+ $sres = array_reduce($doc->getSubjects(), function($res, $s) {
+ $m = [];
+ if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $m) === 1) {
+ array_push($res, [
+ 'uri' => $m[0],
+ 'code' => $m[1]
+ ]);
+ }
+ return $res;
+ }, []);
+
+ $labels = $this->bnfResolver->getLabels(array_unique(array_map(function($so) { return $so['uri'];}, $sres)));
+
+ return array_map(function($so) use ($labels) { return [ 'label' => $labels[$so['uri']], 'code' => $so['code'], 'label_code' => $labels[$so['uri']]."|".$so['code'] ]; }, $sres);
+
+ }
+
+ /**
* Index one document into Elasticsearch
*
* @return int (1 if sucess, 0 if error)
@@ -168,7 +204,8 @@
'body' => [
'title' => (string)$doc->getTitle(),
'date' => (string)$doc->getModified(),
- 'geonames_hierarchy' => $this->getGeonamesHierarchy($doc)
+ 'geonames_hierarchy' => $this->getGeonamesHierarchy($doc),
+ 'subject' => $this->getSubjects($doc)
]
];
Es::index($query_data);
@@ -182,7 +219,8 @@
private function indexBulk($docs)
{
$query_data = ['body' => []];
- foreach($docs as $doc){
+ foreach($docs as $resultDoc){
+ $doc = $this->documentRepository->get($resultDoc->getId());
$query_data['body'][] = [
'index' => [
'_index' => config('elasticsearch.index'),
@@ -192,7 +230,9 @@
];
$query_data['body'][] = [
'title' => (string)$doc->getTitle(),
- 'date' => (string)$doc->getModified()
+ 'date' => (string)$doc->getModified(),
+ 'geonames_hierarchy' => $this->getGeonamesHierarchy($doc),
+ 'subject' => $this->getSubjects($doc)
];
}
Es::bulk($query_data);
@@ -263,18 +303,18 @@
break;
}
$this->indexOne($doc);
+ $progressBar->setMessage($doc->getId());
$progressBar->advance();
- $progressBar->setMessage($doc->getId());
}
}
else
{
$this->indexBulk($docs);
+ $progressBar->setMessage('Page '.$page);
$progressBar->advance();
- $progressBar->setMessage('Page '.$page);
}
}
$progressBar->finish();
- $this->info('Indexing completed');
+ $this->info("\nIndexing completed");
}
}
--- a/server/src/app/Exceptions/Handler.php Tue Oct 04 13:53:56 2016 +0200
+++ b/server/src/app/Exceptions/Handler.php Wed Oct 05 02:31:25 2016 +0200
@@ -39,10 +39,17 @@
public function render($request, Exception $e)
{
if ( $request->isXmlHttpRequest() || $request->wantsJson() ) {
+
+ $statusCode = 500;
+ if(is_callable([$e, 'getStatusCode'])) {
+ $statusCode = $e->getStatusCode();
+ }
+
return response()->json([
+ 'code' => $statusCode,
'message' => class_basename( $e ) . ' in ' . basename( $e->getFile() ) . ' line ' . $e->getLine() . ( ( $message = $e->getMessage() ) ? ': ' . $e->getMessage() : '.' ),
'trace' => $e->getTrace()
- ], 500);
+ ], $statusCode);
}
return parent::render($request, $e);
}
--- a/server/src/app/Http/Controllers/Api/ThemeController.php Tue Oct 04 13:53:56 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/ThemeController.php Wed Oct 05 02:31:25 2016 +0200
@@ -10,6 +10,9 @@
use EasyRdf\Resource;
use EasyRdf\Literal;
+use Es;
+use Log;
+
use CorpusParole\Libraries\Sparql\SparqlClient;
use CorpusParole\Services\LexvoResolverInterface;
use CorpusParole\Services\BnfResolverInterface;
@@ -32,80 +35,65 @@
*/
public function index(Request $request)
{
- $filter = $request->input('filter', 'bnf');
- $resolve = filter_var($request->input('resolve', true), FILTER_VALIDATE_BOOLEAN);
+ $index = $request->input('index', 0);
+ $limit = $request->input('limit', 0);
+ $sort = $request->input('sort', 'count');
- $filterClause = "";
- if($filter === 'bnf') {
- $filterClause = "FILTER (isIRI(?o) && regex(str(?o), '^".config('corpusparole.bnf_ark_base_url')."')) .";
- }
- elseif($filter === 'uri') {
- $filterClause = "FILTER isIRI(?o) .";
- }
- elseif($filter === 'all' || $filter === 'none' || $filter === '') {
- $filterClause = "";
- }
- else {
- abort(401,"Value for filter parameter must be in 'bnf', 'uri', 'all' or 'none'");
+ if($sort == "count" || $sort == "descending") {
+ $order_field = "_count";
+ $order_dir = "desc";
+ } elseif($sort == "-count") {
+ $order_field = "_count";
+ $order_dir = "asc";
+ } elseif ($sort == "label" || $sort == "alphabetical") {
+ $order_field = "_term";
+ $order_dir = "asc";
+ } elseif ($sort == "-label") {
+ $order_field = "_term";
+ $order_dir = "desc";
+ } else {
+ $order_field = "_count";
+ $order_dir = "desc";
}
- $query = preg_replace('/\s+/', ' ',
- "select (?o as ?theme) (COUNT(?s) as ?count) where {
- ?s a <http://www.europeana.eu/schemas/edm/ProvidedCHO> .
- ?s <http://purl.org/dc/elements/1.1/subject> ?o .
- $filterClause
- }
- GROUP BY ?o
- ORDER BY DESC(?count)");
+ $query = [
+ 'index' => env('ELASTICSEARCH_INDEX'),
+ 'body' => [
+ 'size' => 0,
+ 'aggs' => [
+ "subjects" => [
+ "nested" => [ "path" => "subject" ],
+ "aggs" => [
+ "subjects" => [
+ "terms" => [
+ "field" => "subject.label_code",
+ "size" => $limit * ($index+1),
+ "order" => [ $order_field => $order_dir ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ]
+ ];
- $docs = $this->sparqlClient->query($query);
+ $esRes = Es::search($query);
$themes = [];
- $labels = [];
- $bnfUris = [];
- $lexvoUris = [];
+ $bucketList = array_slice($esRes['aggregations']['subjects']['subjects']['buckets'], $index*$limit, $limit);
- foreach ($docs as $row) {
- $key = "";
- $label = null;
- if($row->theme instanceof Resource) {
- $key = $row->theme->getUri();
- if($resolve && strpos($key, config('corpusparole.bnf_ark_base_url')) === 0) {
- array_push($bnfUris, $key);
- }
- elseif($resolve && strpos($key, config('corpusparole.lexvo_base_url')) === 0) {
- array_push($lexvoUris, $key);
- }
- $label = null;
- }
- elseif($row->theme instanceof Literal) {
- $key = $row->theme->getValue();
- $label = $row->theme->getValue();
- }
-
- $themes[$key] = [
+ foreach($bucketList as $bucket) {
+ $parts = explode("|", $bucket['key']);
+ $label = $parts[0];
+ $url = config('corpusparole.bnf_ark_base_url').$parts[1];
+ $themes[$url] = [
"label" => $label,
- "count" => $row->count->getValue()
+ "count" => $bucket['doc_count']
];
}
- if($resolve) {
- if(count($lexvoUris) > 0) {
- $labels = $this->lexvoResolver->getNames($lexvoUris);
- }
- if(count($bnfUris) > 0) {
- $labels = array_merge($labels, $this->bnfResolver->getLabels($bnfUris));
- }
- foreach ($themes as $themeKey => $themeDef) {
- if(array_key_exists($themeKey, $labels)) {
- $themeDef['label'] = $labels[$themeKey];
- }
- $themes[$themeKey] = $themeDef;
- }
- }
-
return response()->json(['themes' => $themes ]);
}
--- a/server/src/config/corpusparole.php Tue Oct 04 13:53:56 2016 +0200
+++ b/server/src/config/corpusparole.php Wed Oct 05 02:31:25 2016 +0200
@@ -119,6 +119,7 @@
'bnf_base_url' => 'http://data.bnf.fr/',
'bnf_ark_base_url' => 'http://ark.bnf.fr/',
+ 'bnf_ark_url_regexp' => '/http[s]?\:\/\/(?:data|ark)\.bnf\.fr\/(ark\:\/12148\/[[:alnum:]]+)\/?/',
'bnf_cache_expiration' => 60*24*30,
'bnf_max_ids' => 5,
'bnf_query_url' => 'http://data.bnf.fr/sparql',