7 use Illuminate\Console\Command; |
7 use Illuminate\Console\Command; |
8 use Symfony\Component\Console\Input\InputOption; |
8 use Symfony\Component\Console\Input\InputOption; |
9 use Symfony\Component\Console\Input\InputArgument; |
9 use Symfony\Component\Console\Input\InputArgument; |
10 use Phpoaipmh\Client; |
10 use Phpoaipmh\Client; |
11 use Phpoaipmh\Endpoint; |
11 use Phpoaipmh\Endpoint; |
|
12 use CorpusParole\Libraries\Sparql\GuzzleSparqlClient; |
12 |
13 |
13 class ImportCocoonRDF extends Command { |
14 class ImportCocoonRDF extends Command { |
14 |
15 |
15 const INSERT_TIMEOUT_RETRY = 5; |
16 const INSERT_TIMEOUT_RETRY = 5; |
16 |
17 |
216 $this->comment("Clear raw repository: ".($rawClear?'TRUE':'FALSE')); |
217 $this->comment("Clear raw repository: ".($rawClear?'TRUE':'FALSE')); |
217 $this->comment("Clear repository: ".($clear?'TRUE':'FALSE')); |
218 $this->comment("Clear repository: ".($clear?'TRUE':'FALSE')); |
218 $this->comment("Keep existing document into repository: ".($keepRepoDoc?'TRUE':'FALSE')); |
219 $this->comment("Keep existing document into repository: ".($keepRepoDoc?'TRUE':'FALSE')); |
219 $this->comment("Overwrite more recent document:".($forceImport?'TRUE':'FALSE')); |
220 $this->comment("Overwrite more recent document:".($forceImport?'TRUE':'FALSE')); |
220 |
221 |
221 $this->gs = new \EasyRdf\Sparql\Client(Config::get('corpusparole.rdf4j_query_url'), Config::get('corpusparole.rdf4j_update_url')); |
222 $this->httpClient = app()->make('Guzzle'); |
222 $this->gs_raw = new \EasyRdf\Sparql\Client(Config::get('corpusparole.rdf4j_query_url_raw'), Config::get('corpusparole.rdf4j_update_url_raw')); |
223 $this->gs = new GuzzleSparqlClient($this->httpClient, Config::get('corpusparole.rdf4j_query_url'), Config::get('corpusparole.rdf4j_update_url')); |
|
224 $this->gs_raw = new GuzzleSparqlClient($this->httpClient, Config::get('corpusparole.rdf4j_query_url_raw'), Config::get('corpusparole.rdf4j_update_url_raw')); |
223 |
225 |
224 $this->documentCount = [ |
226 $this->documentCount = [ |
225 'all' => 0, |
227 'all' => 0, |
226 'unknown' => 0, |
228 'unknown' => 0, |
227 'error' => 0, |
229 'error' => 0, |
266 $loadRetry = 0; |
268 $loadRetry = 0; |
267 $doc = null; |
269 $doc = null; |
268 while(!$docLoaded && $loadRetry < config('corpusparole.max_load_retry', 3)) { |
270 while(!$docLoaded && $loadRetry < config('corpusparole.max_load_retry', 3)) { |
269 $loadRetry++; |
271 $loadRetry++; |
270 try { |
272 try { |
271 $doc = new \EasyRdf\Graph($docRdfUrl); |
273 $resp = $this->httpClient->get($docRdfUrl); |
272 $doc->load(); |
274 $content_type = $resp->getHeader('Content-Type'); |
|
275 $format = null; |
|
276 if(is_array($content_type) && count($content_type)>0) { |
|
277 list($format, ) = \EasyRdf\Utils::parseMimeType($content_type[0]); |
|
278 } |
|
279 $doc = new \EasyRdf\Graph($docRdfUrl, $resp->getBody(), $format); |
273 $docLoaded = true; |
280 $docLoaded = true; |
274 } |
281 } |
275 //TODO: catch network exception - add error to database |
282 //TODO: catch network exception - add error to database |
276 catch(\Exception $e) { |
283 catch(\GuzzleHttp\Exception\ConnectException $e) { |
277 $code = $e->getCode(); |
284 $this->info("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying"); |
278 $message = $e->getMessage(); |
285 Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying"); |
279 $this->info("\nError processing $identifier. code : $code, message: $message"); |
286 continue; |
280 Log::debug("Error processing $identifier. code : $code, message: $message"); |
287 } |
281 if($code == 400 || ($code == 0 && stripos($message, 'timed out')>=0) ) { |
288 catch(\GuzzleHttp\Exception\ClientException $e) { |
|
289 if($e->getResponse()->getStatusCode() == 400) { |
282 $this->info("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying"); |
290 $this->info("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying"); |
283 Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying"); |
291 Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying"); |
284 continue; |
292 continue; |
285 } |
293 } |
286 else { |
294 else { |
287 $this->error("\nError processing $identifier ($docRdfUrl) : $e"); |
295 $this->error("\nError processing $identifier ($docRdfUrl) : $e"); |
288 Log::error("Error processing $identifier ($docRdfUrl) : $e"); |
296 Log::error("Error processing $identifier ($docRdfUrl) : $e"); |
289 break; |
297 break; |
290 } |
298 } |
291 //$this->error(print_r($e->getTraceAsString(),true)); |
299 } |
|
300 catch(\Exception $e) { |
|
301 $this->error("\nError processing $identifier ($docRdfUrl) : $e"); |
|
302 Log::error("Error processing $identifier ($docRdfUrl) : $e"); |
|
303 break; |
292 } |
304 } |
293 } |
305 } |
294 if(!$docLoaded) { |
306 if(!$docLoaded) { |
295 $this->documentCount['error'] += 1; |
307 $this->documentCount['error'] += 1; |
296 continue; |
308 continue; |