diff -r a76bae4795d5 -r 48f5380c26d0 server/src/app/Console/Commands/ImportCocoonRDF.php --- a/server/src/app/Console/Commands/ImportCocoonRDF.php Thu Apr 06 11:29:14 2017 +0200 +++ b/server/src/app/Console/Commands/ImportCocoonRDF.php Fri Jun 09 15:22:02 2017 +0200 @@ -9,6 +9,7 @@ use Symfony\Component\Console\Input\InputArgument; use Phpoaipmh\Client; use Phpoaipmh\Endpoint; +use CorpusParole\Libraries\Sparql\GuzzleSparqlClient; class ImportCocoonRDF extends Command { @@ -218,8 +219,9 @@ $this->comment("Keep existing document into repository: ".($keepRepoDoc?'TRUE':'FALSE')); $this->comment("Overwrite more recent document:".($forceImport?'TRUE':'FALSE')); - $this->gs = new \EasyRdf\Sparql\Client(Config::get('corpusparole.rdf4j_query_url'), Config::get('corpusparole.rdf4j_update_url')); - $this->gs_raw = new \EasyRdf\Sparql\Client(Config::get('corpusparole.rdf4j_query_url_raw'), Config::get('corpusparole.rdf4j_update_url_raw')); + $this->httpClient = app()->make('Guzzle'); + $this->gs = new GuzzleSparqlClient($this->httpClient, Config::get('corpusparole.rdf4j_query_url'), Config::get('corpusparole.rdf4j_update_url')); + $this->gs_raw = new GuzzleSparqlClient($this->httpClient, Config::get('corpusparole.rdf4j_query_url_raw'), Config::get('corpusparole.rdf4j_update_url_raw')); $this->documentCount = [ 'all' => 0, @@ -268,17 +270,23 @@ while(!$docLoaded && $loadRetry < config('corpusparole.max_load_retry', 3)) { $loadRetry++; try { - $doc = new \EasyRdf\Graph($docRdfUrl); - $doc->load(); + $resp = $this->httpClient->get($docRdfUrl); + $content_type = $resp->getHeader('Content-Type'); + $format = null; + if(is_array($content_type) && count($content_type)>0) { + list($format, ) = \EasyRdf\Utils::parseMimeType($content_type[0]); + } + $doc = new \EasyRdf\Graph($docRdfUrl, $resp->getBody(), $format); $docLoaded = true; } //TODO: catch network exception - add error to database - catch(\Exception $e) { - $code = $e->getCode(); - $message = $e->getMessage(); - $this->info("\nError processing $identifier. code : $code, message: $message"); - Log::debug("Error processing $identifier. code : $code, message: $message"); - if($code == 400 || ($code == 0 && stripos($message, 'timed out')>=0) ) { + catch(\GuzzleHttp\Exception\ConnectException $e) { + $this->info("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying"); + Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying"); + continue; + } + catch(\GuzzleHttp\Exception\ClientException $e) { + if($e->getResponse()->getStatusCode() == 400) { $this->info("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying"); Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying"); continue; @@ -288,7 +296,11 @@ Log::error("Error processing $identifier ($docRdfUrl) : $e"); break; } - //$this->error(print_r($e->getTraceAsString(),true)); + } + catch(\Exception $e) { + $this->error("\nError processing $identifier ($docRdfUrl) : $e"); + Log::error("Error processing $identifier ($docRdfUrl) : $e"); + break; } } if(!$docLoaded) {