--- a/server/src/app/Console/Commands/IndexDocuments.php Thu Oct 06 11:14:32 2016 +0200
+++ b/server/src/app/Console/Commands/IndexDocuments.php Thu Oct 06 16:45:37 2016 +0200
@@ -27,7 +27,7 @@
{--limit=0 : index only the first n documents, 0 (default) means index everything }
{--no-bulk : index documents one by one instead of using ElasticSearch bulk indexing }
{--step-size=100 : number of documents to retrieve from repository at a time before indexing}
- {--reset-geo-cache: reset geo cache befr indexing}';
+ {--reset-geo-cache : reset geo cache befr indexing}';
/**
* The console command description.
@@ -61,12 +61,8 @@
*
* @return int (1 if sucess, 0 if error)
*/
- private function resetIndex($resetGeoCache)
+ private function resetIndex()
{
- if($resetGeoCache) {
- // delete all rows in GeonamesHierarchy
- GeonamesHierarchy::getQuery()->delete();
- }
$indexParams = [
'index' => env('ELASTICSEARCH_INDEX')
];
@@ -189,17 +185,30 @@
$mLexvo = [];
if($s instanceof Resource && preg_match(config('corpusparole.bnf_ark_url_regexp'), $s->getUri(), $mBnf) === 1) {
+
array_push($res, [
'uri' => $mBnf[0],
'code' => $mBnf[1],
'type' => 'bnf'
]);
- } elseif($s instanceof Resource && preg_match(config('corpusparole.lexvo_url_regexp'), $s->getUri(). $mLexvo) === 1) {
+ } elseif($s instanceof Resource && preg_match(config('corpusparole.lexvo_url_regexp'), $s->getUri(), $mLexvo) === 1) {
array_push($res, [
'uri' => $mLexvo[0],
'code' => $mLexvo[1],
'type' => 'lxv'
]);
+ } elseif($s instanceof Literal && strpos($s->getDatatypeUri(), config('corpusparole.olac_base_url')) === 0 ) {
+ array_push($res, [
+ 'uri' => $s->getValue(),
+ 'code' => $s->getValue(),
+ 'type' => 'olac'
+ ]);
+ } elseif($s instanceof Literal) {
+ array_push($res, [
+ 'uri' => $s->getValue(),
+ 'code' => $s->getValue(),
+ 'type' => 'txt'
+ ]);
}
return $res;
@@ -216,7 +225,7 @@
},[]
))
);
- $labelsLexvo = $this->lexvoResolver->getLabels(
+ $labelsLexvo = $this->lexvoResolver->getNames(
array_unique(array_reduce(
$sres,
function($r, $so) {
@@ -311,9 +320,14 @@
$stepSize = $this->option('step-size');
$this->comment(' - Indexing with step size of '.$stepSize);
- $resetGeoCache = $this->option('reset-geo-cache', false);
+ if($this->option('reset-geo-cache', false)) {
+ // delete all rows in GeonamesHierarchy
+ GeonamesHierarchy::getQuery()->delete();
+ $this->comment('Geonames cache reset!');
+ }
+
$this->info('Resetting index...');
- $success = $this->resetIndex($resetGeoCache);
+ $success = $this->resetIndex();
if($success==1){
$this->comment('Index reset!');
}
--- a/server/src/app/Http/Controllers/Api/ThemeController.php Thu Oct 06 11:14:32 2016 +0200
+++ b/server/src/app/Http/Controllers/Api/ThemeController.php Thu Oct 06 16:45:37 2016 +0200
@@ -41,16 +41,22 @@
$sort = $request->input('sort', 'count');
if($filter === 'bnf') {
- $include = ".*\|bnf\|.*";
+ $include = ".*\\|bnf\\|.*";
}
elseif($filter === 'lexvo') {
- $include = ".*\|lxv\|.*";
+ $include = ".*\\|lxv\\|.*";
+ }
+ elseif($filter === 'olac') {
+ $include = ".*\\|olac\\|.*";
+ }
+ elseif($filter === 'txt') {
+ $include = ".*\\|txt\\|.*";
}
elseif($filter === 'all') {
$include = false;
}
else {
- abort(401,"Value for filter parameter must be in 'bnf', 'lexvo', 'all'");
+ abort(401,"Value for filter parameter must be in 'bnf', 'lexvo', 'olac', 'txt', 'all'");
}
if($sort == "count" || $sort == "descending") {
@@ -99,12 +105,20 @@
$themes = [];
- $bucketList = array_slice($esRes['aggregations']['subjects']['subjects']['buckets'], $index*$limit, $limit);
+ $bucketList = $esRes['aggregations']['subjects']['subjects']['buckets'];
+ if($limit > 0) {
+ $bucketList = array_slice($bucketList, $index*$limit, $limit);
+ }
foreach($bucketList as $bucket) {
$parts = explode("|", $bucket['key']);
$label = $parts[0];
- $url = ['bnf' => config('corpusparole.bnf_ark_base_url'), 'lxv' => config('corpusparole.lexvo_base_url')][$parts[1]].$parts[2];
+ if($parts[1] === 'bnf' || $parts[1] === 'lxv') {
+ $url = ['bnf' => config('corpusparole.bnf_ark_base_url'), 'lxv' => config('corpusparole.lexvo_base_url')][$parts[1]].$parts[2];
+ }
+ else {
+ $url = $parts[2];
+ }
$themes[$url] = [
"label" => $label,
"count" => $bucket['doc_count']
--- a/server/src/config/corpusparole.php Thu Oct 06 11:14:32 2016 +0200
+++ b/server/src/config/corpusparole.php Thu Oct 06 16:45:37 2016 +0200
@@ -54,6 +54,8 @@
"http://ark.bnf.fr/ark:/12148/cb11949715t" => "réunions"
],
+ 'olac_base_url' => 'http://www.language-archives.org/OLAC',
+
'olac_discourse_type' => [
'uri' => 'http://www.language-archives.org/OLAC/1.1/discourse-type',
'values' => [
@@ -115,7 +117,7 @@
'theme_default_limit' => 40,
'lexvo_base_url' => 'http://lexvo.org/id/iso639-3/',
- 'lexvo_url_regexp' => '/http[s]?\:\/\/lexvo\.org\/iso639\-3\/([[:alpha:]]{3})\/?/',
+ 'lexvo_url_regexp' => '/http[s]?\:\/\/lexvo\.org\/id\/iso639\-3\/([[:alpha:]]{3})\/?/',
'lexvo_cache_expiration' => 60*24*30,
'lexvo_max_ids' => 50,
'lexvo_sesame_query_url' => $sesameBaseUrl.'repositories/'.env('CORPUSPAROLE_SESAME_LEXVO_REPOSITORY'),