# HG changeset patch # User durandn # Date 1450172400 -3600 # Node ID de47e8f66e8b0d1d7bb74e3358fd918f815ec915 # Parent 037687868bc4ceffac153dea7a531baf85927049 Added command "corpus-parole:indexDocuments" to index documents into ElasticSearch diff -r 037687868bc4 -r de47e8f66e8b server/src/.env.example --- a/server/src/.env.example Thu Dec 10 16:05:53 2015 +0100 +++ b/server/src/.env.example Tue Dec 15 10:40:00 2015 +0100 @@ -25,3 +25,9 @@ CORPUSPAROLE_SESAME_UPDATE_URL= EASYRDF_HTTP_CLIENT_TIMEOUT=20 + +ELASTICSEARCH_URL= +ELASTICSEARCH_LOG_PATH='/logs/elasticsearch.log' +ELASTICSEARCH_INDEX='corpus' +ELASTICSEARCH_SHARDS=5 +ELASTICSEARCH_REPLICAS=1 \ No newline at end of file diff -r 037687868bc4 -r de47e8f66e8b server/src/app/Console/Commands/IndexDocuments.php --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/src/app/Console/Commands/IndexDocuments.php Tue Dec 15 10:40:00 2015 +0100 @@ -0,0 +1,214 @@ +documentRepository = $documentRepository; + parent::__construct(); + } + + + + /** + * Reset Elasticsearch index + * + * @return int (1 if sucess, 0 if error) + */ + private function resetIndex() + { + $indexParams = [ + 'index' => env('ELASTICSEARCH_INDEX') + ]; + if(Es::indices()->exists($indexParams)){ + $response = Es::indices()->delete($indexParams); + if($response['acknowledged']!=1){ + return 0; + } + } + $indexParams['body'] = [ + 'settings' => [ + 'number_of_shards' => env('ELASTICSEARCH_SHARDS'), + 'number_of_replicas' => env('ELASTICSEARCH_REPLICAS'), + 'index.mapping.ignore_malformed' => True + ], + 'mappings' => [ + 'document' => [ + 'properties' => [ + 'title' => [ + 'type' => 'string', + 'store' => True, + 'fields' => [ + 'raw' => [ + 'type' => 'string', + 'index' => 'not_analyzed' + ] + ] + ], + 'date' => [ + 'type' => 'date', + 'store' => True + ] + ] + ] + ] + ]; + $response = Es::indices()->create($indexParams); + if($response['acknowledged']!=1){ + return 0; + } + return 1; + } + + /** + * Index one document into Elasticsearch + * + * @return int (1 if sucess, 0 if error) + */ + private function indexOne($doc) + { + $query_data = [ + 'index' => env('ELASTICSEARCH_INDEX'), + 'type' => 'document', + 'id' => (string)$doc->getId(), + 'body' => [ + 'title' => (string)$doc->getTitle(), + 'date' => (string)$doc->getModified() + ] + ]; + Es::index($query_data); + } + + /** + * Index multiple document into Elasticsearch + * + * @return int (1 if sucess, 0 if error) + */ + private function indexBulk($docs) + { + $query_data = ['body' => []]; + foreach($docs as $doc){ + $query_data['body'][] = [ + 'index' => [ + '_index' => env('ELASTICSEARCH_INDEX'), + '_type' => 'document', + '_id' => (string)$doc->getId() + ] + ]; + $query_data['body'][] = [ + 'title' => (string)$doc->getTitle(), + 'date' => (string)$doc->getModified() + ]; + } + Es::bulk($query_data); + } + /** + * Execute the console command. + * + * @return mixed + */ + public function handle() + { + $this->info('Options:'); + $noBulk = $this->option('no-bulk'); + if ($noBulk) + { + $this->comment(' - Indexing without bulk insert'); + } + else + { + $this->comment(' - Indexing using bulk insert'); + } + $limit = $this->option('limit'); + if ($limit>0) { + $this->comment(' - Indexing only the first '.$limit.' documents'); + } + $stepSize = $this->option('step-size'); + $this->comment(' - Indexing with step size of '.$stepSize); + + $this->info('Resetting index...'); + $success = $this->resetIndex(); + if($success==1){ + $this->comment('Index reset!'); + } + else{ + $this->error('Error resetting index ' . env('ELASTICSEARCH_INDEX')); + } + + $this->info('Indexing documents...'); + + if ($limit<=0) { + $lastPage = $this->documentRepository->paginateAll($stepSize, 'page')->lastPage(); + $total = $this->documentRepository->getCount(); + $lastPageEntryCount = $stepSize+1; + } + else { + $lastPage = min((int)($limit/$stepSize)+1, $this->documentRepository->paginateAll($stepSize, 'page')->lastPage()); + $total = $limit; + $lastPageEntryCount = $limit % $stepSize; + } + + if ($noBulk) + { + $progressBar = $this->output->createProgressBar($total); + } + else + { + $progressBar = $this->output->createProgressBar($lastPage); + } + $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%'); + + for ($page=1;$page<=$lastPage;$page++) + { + $docs = $this->documentRepository->paginateAll($stepSize, 'page', $page); + if ($noBulk) + { + foreach ($docs as $i=>$doc){ + if ($page==$lastPage && $i>=$lastPageEntryCount){ + break; + } + $this->indexOne($doc); + $progressBar->advance(); + $progressBar->setMessage($doc->getId()); + } + } + else + { + $this->indexBulk($docs); + $progressBar->advance(); + $progressBar->setMessage('Page '.$page); + } + } + $progressBar->finish(); + $this->info('Indexing completed'); + } +} diff -r 037687868bc4 -r de47e8f66e8b server/src/app/Console/Kernel.php --- a/server/src/app/Console/Kernel.php Thu Dec 10 16:05:53 2015 +0100 +++ b/server/src/app/Console/Kernel.php Tue Dec 15 10:40:00 2015 +0100 @@ -15,6 +15,7 @@ protected $commands = [ 'CorpusParole\Console\Commands\Inspire', 'CorpusParole\Console\Commands\ImportCocoonRDF', + 'CorpusParole\Console\Commands\IndexDocuments', ]; /** diff -r 037687868bc4 -r de47e8f66e8b server/src/composer.json --- a/server/src/composer.json Thu Dec 10 16:05:53 2015 +0100 +++ b/server/src/composer.json Tue Dec 15 10:40:00 2015 +0100 @@ -10,7 +10,8 @@ "ml/json-ld": "*", "caseyamcl/phpoaipmh": "~2.4", "guzzlehttp/guzzle": "~6.0", - "laravelcollective/html": "5.1.*" + "laravelcollective/html": "5.1.*", + "shift31/laravel-elasticsearch": "~1.0" }, "require-dev": { "phpunit/phpunit": "~5.0", diff -r 037687868bc4 -r de47e8f66e8b server/src/composer.lock --- a/server/src/composer.lock Thu Dec 10 16:05:53 2015 +0100 +++ b/server/src/composer.lock Tue Dec 15 10:40:00 2015 +0100 @@ -4,8 +4,8 @@ "Read more about it at https://getcomposer.org/doc/01-basic-usage.md#composer-lock-the-lock-file", "This file is @generated automatically" ], - "hash": "814fed9487bfb758c37b3f322650ade6", - "content-hash": "80322b0e38b8cc26e4ef4caa3a13894f", + "hash": "e7a4d2a203d50a598d3a7c5b4c3605b4", + "content-hash": "aa3d65578938af3883cada1736fa5d64", "packages": [ { "name": "caseyamcl/phpoaipmh", @@ -286,7 +286,7 @@ }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/njh/easyrdf/zipball/02864f8996311ff7c6619be70fc1ecf6f3165c48", + "url": "https://api.github.com/repos/njh/easyrdf/zipball/1371c8af2abd3a948c50962b2212fce1912000e7", "reference": "02864f8996311ff7c6619be70fc1ecf6f3165c48", "shasum": "" }, @@ -344,6 +344,156 @@ "time": "2015-10-07 11:58:54" }, { + "name": "elasticsearch/elasticsearch", + "version": "v1.4.1", + "source": { + "type": "git", + "url": "https://github.com/elastic/elasticsearch-php.git", + "reference": "3a5573cf3223d5646a76a5f8ce938048ca340680" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/elastic/elasticsearch-php/zipball/3a5573cf3223d5646a76a5f8ce938048ca340680", + "reference": "3a5573cf3223d5646a76a5f8ce938048ca340680", + "shasum": "" + }, + "require": { + "ext-curl": "*", + "guzzle/guzzle": "~3.0", + "monolog/monolog": "~1.11", + "php": ">=5.3.9", + "pimple/pimple": "~3.0", + "psr/log": "~1.0" + }, + "require-dev": { + "athletic/athletic": "~0.1", + "cpliakas/git-wrapper": "~1.0", + "mikey179/vfsstream": "~1.2", + "mockery/mockery": "0.9.4", + "phpunit/phpunit": "3.7.*", + "satooshi/php-coveralls": "dev-master", + "symfony/yaml": "2.4.3 as 2.4.2", + "twig/twig": "1.*" + }, + "type": "library", + "autoload": { + "psr-4": { + "Elasticsearch\\": "src/Elasticsearch/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "Apache 2" + ], + "authors": [ + { + "name": "Zachary Tong" + } + ], + "description": "PHP Client for Elasticsearch", + "keywords": [ + "client", + "elasticsearch", + "search" + ], + "time": "2015-09-17 22:01:44" + }, + { + "name": "guzzle/guzzle", + "version": "v3.9.3", + "source": { + "type": "git", + "url": "https://github.com/guzzle/guzzle3.git", + "reference": "0645b70d953bc1c067bbc8d5bc53194706b628d9" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/guzzle/guzzle3/zipball/0645b70d953bc1c067bbc8d5bc53194706b628d9", + "reference": "0645b70d953bc1c067bbc8d5bc53194706b628d9", + "shasum": "" + }, + "require": { + "ext-curl": "*", + "php": ">=5.3.3", + "symfony/event-dispatcher": "~2.1" + }, + "replace": { + "guzzle/batch": "self.version", + "guzzle/cache": "self.version", + "guzzle/common": "self.version", + "guzzle/http": "self.version", + "guzzle/inflection": "self.version", + "guzzle/iterator": "self.version", + "guzzle/log": "self.version", + "guzzle/parser": "self.version", + "guzzle/plugin": "self.version", + "guzzle/plugin-async": "self.version", + "guzzle/plugin-backoff": "self.version", + "guzzle/plugin-cache": "self.version", + "guzzle/plugin-cookie": "self.version", + "guzzle/plugin-curlauth": "self.version", + "guzzle/plugin-error-response": "self.version", + "guzzle/plugin-history": "self.version", + "guzzle/plugin-log": "self.version", + "guzzle/plugin-md5": "self.version", + "guzzle/plugin-mock": "self.version", + "guzzle/plugin-oauth": "self.version", + "guzzle/service": "self.version", + "guzzle/stream": "self.version" + }, + "require-dev": { + "doctrine/cache": "~1.3", + "monolog/monolog": "~1.0", + "phpunit/phpunit": "3.7.*", + "psr/log": "~1.0", + "symfony/class-loader": "~2.1", + "zendframework/zend-cache": "2.*,<2.3", + "zendframework/zend-log": "2.*,<2.3" + }, + "suggest": { + "guzzlehttp/guzzle": "Guzzle 5 has moved to a new package name. The package you have installed, Guzzle 3, is deprecated." + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "3.9-dev" + } + }, + "autoload": { + "psr-0": { + "Guzzle": "src/", + "Guzzle\\Tests": "tests/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Michael Dowling", + "email": "mtdowling@gmail.com", + "homepage": "https://github.com/mtdowling" + }, + { + "name": "Guzzle Community", + "homepage": "https://github.com/guzzle/guzzle/contributors" + } + ], + "description": "PHP HTTP client. This library is deprecated in favor of https://packagist.org/packages/guzzlehttp/guzzle", + "homepage": "http://guzzlephp.org/", + "keywords": [ + "client", + "curl", + "framework", + "http", + "http client", + "rest", + "web service" + ], + "time": "2015-03-18 18:23:50" + }, + { "name": "guzzlehttp/guzzle", "version": "6.1.0", "source": { @@ -1230,6 +1380,52 @@ "time": "2015-09-19 14:15:08" }, { + "name": "pimple/pimple", + "version": "v3.0.2", + "source": { + "type": "git", + "url": "https://github.com/silexphp/Pimple.git", + "reference": "a30f7d6e57565a2e1a316e1baf2a483f788b258a" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/silexphp/Pimple/zipball/a30f7d6e57565a2e1a316e1baf2a483f788b258a", + "reference": "a30f7d6e57565a2e1a316e1baf2a483f788b258a", + "shasum": "" + }, + "require": { + "php": ">=5.3.0" + }, + "type": "library", + "extra": { + "branch-alias": { + "dev-master": "3.0.x-dev" + } + }, + "autoload": { + "psr-0": { + "Pimple": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "license": [ + "MIT" + ], + "authors": [ + { + "name": "Fabien Potencier", + "email": "fabien@symfony.com" + } + ], + "description": "Pimple, a simple Dependency Injection Container", + "homepage": "http://pimple.sensiolabs.org", + "keywords": [ + "container", + "dependency injection" + ], + "time": "2015-09-11 15:10:35" + }, + { "name": "psr/http-message", "version": "1.0", "source": { @@ -1389,6 +1585,44 @@ "time": "2015-07-16 15:26:57" }, { + "name": "shift31/laravel-elasticsearch", + "version": "1.3.4", + "source": { + "type": "git", + "url": "https://github.com/shift31/laravel-elasticsearch.git", + "reference": "5341998fc1d87a21ee44e44482d07de297b1b515" + }, + "dist": { + "type": "zip", + "url": "https://api.github.com/repos/shift31/laravel-elasticsearch/zipball/5341998fc1d87a21ee44e44482d07de297b1b515", + "reference": "5341998fc1d87a21ee44e44482d07de297b1b515", + "shasum": "" + }, + "require": { + "elasticsearch/elasticsearch": "~1.3", + "illuminate/support": "~4|~5", + "php": ">=5.3.0" + }, + "type": "library", + "autoload": { + "classmap": [ + "src/migrations" + ], + "psr-0": { + "Shift31\\LaravelElasticsearch": "src/" + } + }, + "notification-url": "https://packagist.org/downloads/", + "authors": [ + { + "name": "Shift 31 Consulting", + "email": "code@shift31.com" + } + ], + "description": "A Laravel Service Provider for the Elasticsearch API client", + "time": "2015-08-10 02:30:03" + }, + { "name": "swiftmailer/swiftmailer", "version": "v5.4.1", "source": { @@ -2350,7 +2584,7 @@ }, "dist": { "type": "zip", - "url": "https://api.github.com/repos/padraic/mockery/zipball/bb19cd92e91aee51c344d8bd1a453dc85de47c21", + "url": "https://api.github.com/repos/padraic/mockery/zipball/15f6bb72f66df6857b957bedd09dc4fd0ace3b03", "reference": "bb19cd92e91aee51c344d8bd1a453dc85de47c21", "shasum": "" }, diff -r 037687868bc4 -r de47e8f66e8b server/src/config/app.php --- a/server/src/config/app.php Thu Dec 10 16:05:53 2015 +0100 +++ b/server/src/config/app.php Tue Dec 15 10:40:00 2015 +0100 @@ -141,6 +141,7 @@ * Additional service providers */ 'Collective\Html\HtmlServiceProvider', + 'Shift31\LaravelElasticsearch\ElasticsearchServiceProvider', /* * Application Service Providers... diff -r 037687868bc4 -r de47e8f66e8b server/src/config/elasticsearch.php --- /dev/null Thu Jan 01 00:00:00 1970 +0000 +++ b/server/src/config/elasticsearch.php Tue Dec 15 10:40:00 2015 +0100 @@ -0,0 +1,11 @@ + array( + env('ELASTICSEARCH_URL') + ), + 'logPath' => storage_path() . env('ELASTICSEARCH_LOG_PATH'), + 'logLevel' => Logger::INFO +);