server/src/app/Console/Commands/ImportCocoonRDF.php
changeset 531 48f5380c26d0
parent 526 cdaf9dfb5dfd
child 544 ad58d7627f70
equal deleted inserted replaced
530:a76bae4795d5 531:48f5380c26d0
     7 use Illuminate\Console\Command;
     7 use Illuminate\Console\Command;
     8 use Symfony\Component\Console\Input\InputOption;
     8 use Symfony\Component\Console\Input\InputOption;
     9 use Symfony\Component\Console\Input\InputArgument;
     9 use Symfony\Component\Console\Input\InputArgument;
    10 use Phpoaipmh\Client;
    10 use Phpoaipmh\Client;
    11 use Phpoaipmh\Endpoint;
    11 use Phpoaipmh\Endpoint;
       
    12 use CorpusParole\Libraries\Sparql\GuzzleSparqlClient;
    12 
    13 
    13 class ImportCocoonRDF extends Command {
    14 class ImportCocoonRDF extends Command {
    14 
    15 
    15     const INSERT_TIMEOUT_RETRY = 5;
    16     const INSERT_TIMEOUT_RETRY = 5;
    16 
    17 
   216         $this->comment("Clear raw repository: ".($rawClear?'TRUE':'FALSE'));
   217         $this->comment("Clear raw repository: ".($rawClear?'TRUE':'FALSE'));
   217         $this->comment("Clear repository: ".($clear?'TRUE':'FALSE'));
   218         $this->comment("Clear repository: ".($clear?'TRUE':'FALSE'));
   218         $this->comment("Keep existing document into repository: ".($keepRepoDoc?'TRUE':'FALSE'));
   219         $this->comment("Keep existing document into repository: ".($keepRepoDoc?'TRUE':'FALSE'));
   219         $this->comment("Overwrite more recent document:".($forceImport?'TRUE':'FALSE'));
   220         $this->comment("Overwrite more recent document:".($forceImport?'TRUE':'FALSE'));
   220 
   221 
   221         $this->gs = new \EasyRdf\Sparql\Client(Config::get('corpusparole.rdf4j_query_url'), Config::get('corpusparole.rdf4j_update_url'));
   222         $this->httpClient = app()->make('Guzzle');
   222         $this->gs_raw = new \EasyRdf\Sparql\Client(Config::get('corpusparole.rdf4j_query_url_raw'), Config::get('corpusparole.rdf4j_update_url_raw'));
   223         $this->gs = new GuzzleSparqlClient($this->httpClient, Config::get('corpusparole.rdf4j_query_url'), Config::get('corpusparole.rdf4j_update_url'));
       
   224         $this->gs_raw = new GuzzleSparqlClient($this->httpClient, Config::get('corpusparole.rdf4j_query_url_raw'), Config::get('corpusparole.rdf4j_update_url_raw'));
   223 
   225 
   224         $this->documentCount = [
   226         $this->documentCount = [
   225             'all' => 0,
   227             'all' => 0,
   226             'unknown' => 0,
   228             'unknown' => 0,
   227             'error' => 0,
   229             'error' => 0,
   266                 $loadRetry = 0;
   268                 $loadRetry = 0;
   267                 $doc = null;
   269                 $doc = null;
   268                 while(!$docLoaded && $loadRetry < config('corpusparole.max_load_retry', 3)) {
   270                 while(!$docLoaded && $loadRetry < config('corpusparole.max_load_retry', 3)) {
   269                     $loadRetry++;
   271                     $loadRetry++;
   270                     try {
   272                     try {
   271                         $doc = new \EasyRdf\Graph($docRdfUrl);
   273                         $resp = $this->httpClient->get($docRdfUrl);
   272                         $doc->load();
   274                         $content_type = $resp->getHeader('Content-Type');
       
   275                         $format = null;
       
   276                         if(is_array($content_type) && count($content_type)>0) {
       
   277                             list($format, ) = \EasyRdf\Utils::parseMimeType($content_type[0]);
       
   278                         }
       
   279                         $doc = new \EasyRdf\Graph($docRdfUrl, $resp->getBody(), $format);
   273                         $docLoaded = true;
   280                         $docLoaded = true;
   274                     }
   281                     }
   275                     //TODO: catch network exception - add error to database
   282                     //TODO: catch network exception - add error to database
   276                     catch(\Exception $e) {
   283                     catch(\GuzzleHttp\Exception\ConnectException $e) {
   277                         $code = $e->getCode();
   284                         $this->info("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying");
   278                         $message = $e->getMessage();
   285                         Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying");
   279                         $this->info("\nError processing $identifier. code : $code, message: $message");
   286                         continue;
   280                         Log::debug("Error processing $identifier. code : $code, message: $message");
   287                     }
   281                         if($code == 400 || ($code == 0 && stripos($message, 'timed out')>=0) ) {
   288                     catch(\GuzzleHttp\Exception\ClientException $e) {
       
   289                         if($e->getResponse()->getStatusCode() == 400) {
   282                             $this->info("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying");
   290                             $this->info("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying");
   283                             Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying");
   291                             Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying");
   284                             continue;
   292                             continue;
   285                         }
   293                         }
   286                         else {
   294                         else {
   287                             $this->error("\nError processing $identifier ($docRdfUrl) : $e");
   295                             $this->error("\nError processing $identifier ($docRdfUrl) : $e");
   288                             Log::error("Error processing $identifier ($docRdfUrl) : $e");
   296                             Log::error("Error processing $identifier ($docRdfUrl) : $e");
   289                             break;
   297                             break;
   290                         }
   298                         }
   291                         //$this->error(print_r($e->getTraceAsString(),true));
   299                     }
       
   300                     catch(\Exception $e) {
       
   301                         $this->error("\nError processing $identifier ($docRdfUrl) : $e");
       
   302                         Log::error("Error processing $identifier ($docRdfUrl) : $e");
       
   303                         break;
   292                     }
   304                     }
   293                 }
   305                 }
   294                 if(!$docLoaded) {
   306                 if(!$docLoaded) {
   295                     $this->documentCount['error'] += 1;
   307                     $this->documentCount['error'] += 1;
   296                     continue;
   308                     continue;