--- a/server/src/app/Console/Commands/ImportCocoonRDF.php Thu Apr 06 11:29:14 2017 +0200
+++ b/server/src/app/Console/Commands/ImportCocoonRDF.php Fri Jun 09 15:22:02 2017 +0200
@@ -9,6 +9,7 @@
use Symfony\Component\Console\Input\InputArgument;
use Phpoaipmh\Client;
use Phpoaipmh\Endpoint;
+use CorpusParole\Libraries\Sparql\GuzzleSparqlClient;
class ImportCocoonRDF extends Command {
@@ -218,8 +219,9 @@
$this->comment("Keep existing document into repository: ".($keepRepoDoc?'TRUE':'FALSE'));
$this->comment("Overwrite more recent document:".($forceImport?'TRUE':'FALSE'));
- $this->gs = new \EasyRdf\Sparql\Client(Config::get('corpusparole.rdf4j_query_url'), Config::get('corpusparole.rdf4j_update_url'));
- $this->gs_raw = new \EasyRdf\Sparql\Client(Config::get('corpusparole.rdf4j_query_url_raw'), Config::get('corpusparole.rdf4j_update_url_raw'));
+ $this->httpClient = app()->make('Guzzle');
+ $this->gs = new GuzzleSparqlClient($this->httpClient, Config::get('corpusparole.rdf4j_query_url'), Config::get('corpusparole.rdf4j_update_url'));
+ $this->gs_raw = new GuzzleSparqlClient($this->httpClient, Config::get('corpusparole.rdf4j_query_url_raw'), Config::get('corpusparole.rdf4j_update_url_raw'));
$this->documentCount = [
'all' => 0,
@@ -268,17 +270,23 @@
while(!$docLoaded && $loadRetry < config('corpusparole.max_load_retry', 3)) {
$loadRetry++;
try {
- $doc = new \EasyRdf\Graph($docRdfUrl);
- $doc->load();
+ $resp = $this->httpClient->get($docRdfUrl);
+ $content_type = $resp->getHeader('Content-Type');
+ $format = null;
+ if(is_array($content_type) && count($content_type)>0) {
+ list($format, ) = \EasyRdf\Utils::parseMimeType($content_type[0]);
+ }
+ $doc = new \EasyRdf\Graph($docRdfUrl, $resp->getBody(), $format);
$docLoaded = true;
}
//TODO: catch network exception - add error to database
- catch(\Exception $e) {
- $code = $e->getCode();
- $message = $e->getMessage();
- $this->info("\nError processing $identifier. code : $code, message: $message");
- Log::debug("Error processing $identifier. code : $code, message: $message");
- if($code == 400 || ($code == 0 && stripos($message, 'timed out')>=0) ) {
+ catch(\GuzzleHttp\Exception\ConnectException $e) {
+ $this->info("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying");
+ Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying");
+ continue;
+ }
+ catch(\GuzzleHttp\Exception\ClientException $e) {
+ if($e->getResponse()->getStatusCode() == 400) {
$this->info("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying");
Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying");
continue;
@@ -288,7 +296,11 @@
Log::error("Error processing $identifier ($docRdfUrl) : $e");
break;
}
- //$this->error(print_r($e->getTraceAsString(),true));
+ }
+ catch(\Exception $e) {
+ $this->error("\nError processing $identifier ($docRdfUrl) : $e");
+ Log::error("Error processing $identifier ($docRdfUrl) : $e");
+ break;
}
}
if(!$docLoaded) {