server/src/app/Console/Commands/ImportCocoonRDF.php
author ymh <ymh.work@gmail.com>
Tue, 17 Nov 2015 13:11:55 +0100
changeset 18 f2a40bbc27f6
parent 4 f55970e41793
child 19 eadaf0b8f02e
permissions -rw-r--r--
add rdf mapper + merger + basic database model
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     1
<?php
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     3
namespace CorpusParole\Console\Commands;
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     4
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     5
use Config;
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
     6
use Log;
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
use Illuminate\Console\Command;
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
use Symfony\Component\Console\Input\InputOption;
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
use Symfony\Component\Console\Input\InputArgument;
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    10
use Phpoaipmh\Client;
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    11
use Phpoaipmh\Endpoint;
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    13
class ImportCocoonRDF extends Command {
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    14
18
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
    15
    const INSERT_TIMEOUT_RETRY = 5;
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
    16
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    17
    /**
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    18
     * The console command description.
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    19
     *
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    20
     * @var string
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    21
     */
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    22
    protected $description = 'Import Rdf from Cocoon.';
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    23
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    24
    /**
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    25
    * The name and signature of the console command.
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    26
    *
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    27
    * @var string
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    28
    */
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    29
    protected $signature = 'corpus-parole:importRDF {--skip=0 : Number of record to skip}';
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    31
    /**
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    32
     * Create a new command instance.
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    33
     */
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    34
    public function __construct() {
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    35
        parent::__construct();
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    36
    }
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    38
    /**
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    39
     * Execute the console command.
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    40
     *
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    41
     * @return mixed
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    42
     */
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    43
    public function fire() {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    44
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    45
        libxml_use_internal_errors(true);
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    46
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    47
        $skip = (int)$this->option('skip');
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    48
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    49
        $this->comment("Skipping $skip records");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    50
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    51
        $gs = new \EasyRdf\Sparql\Client(Config::get('corpusparole.sesame_query_url'), Config::get('corpusparole.sesame_update_url'));
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    52
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    54
        $client = new Client(Config::get('corpusparole.cocoon_oaipmh_url'));
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    55
        $endpoint = new Endpoint($client);
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    56
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    57
        $recs = $endpoint->listRecords('olac', null, null, 'LanguesDeFrance');
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    58
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    59
        //TODO : treat timeout exceptions
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    60
        $progressBar = $this->output->createProgressBar($recs->getTotalRecordsInCollection());
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    61
        $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%');
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
18
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
    63
        $insertTimeouts = 0;
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
    64
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    65
        foreach ($recs as $item) {
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    66
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    67
            $identifier = (string) $item->xpath('/record/header/identifier')[0];
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    68
            $docRdfUrl = Config::get('corpusparole.cocoon_rdf_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base')));
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    69
            $message = "$identifier : $docRdfUrl";
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    70
            if($recs->getNumRetrieved() <= $skip) {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    71
                $progressBar->setMessage("$message - Skipping");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    72
                $progressBar->advance();
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    73
                continue;
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    74
            }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    75
            $progressBar->setMessage($message);
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    76
            $progressBar->advance();
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    78
            $docUri = config('corpusparole.cocoon_doc_id_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base')));
18
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
    79
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    80
            $resDocs = $gs->query("ASK WHERE { GRAPH <$docUri> { ?s ?p ?o }}");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    81
            if(!$resDocs->getBoolean()) {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    82
                $docLoaded = false;
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    83
                $loadRetry = 0;
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    84
                while(!$docLoaded && $loadRetry < config('corpusparole.max_load_retry', 3)) {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    85
                    $loadRetry++;
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    86
                    try {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    87
                        $doc = new \EasyRdf\Graph($docRdfUrl);
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    88
                        $doc->load();
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    89
                        $docLoaded = true;
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    90
                    }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    91
                    //TODO: catch network exception - add error to database
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    92
                    catch(\Exception $e) {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    93
                        $code = $e->getCode();
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    94
                        $message = $e->getMessage();
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    95
                        $this->debug("\nError processing $identifier. code : $code, message: $message");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    96
                        Log::debug("Error processing $identifier. code : $code, message: $message");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    97
                        if($code == 1 && stripos($message, 'timed out')>=0 ) {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    98
                            $this->warning("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    99
                            Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   100
                            continue;
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   101
                        }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   102
                        else {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   103
                            $this->error("\nError processing $identifier ($docRdfUrl) : $e");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   104
                            Log::error("Error processing $identifier ($docRdfUrl) : $e");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   105
                            break;
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   106
                        }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   107
                        //$this->error(print_r($e->getTraceAsString(),true));
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   108
                    }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   109
                }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   110
                if(!$docLoaded) {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   111
                    continue;
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   112
                }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   113
                //TODO: treat errors
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   114
                $subjects = $doc->resources();
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   115
                $subject = reset($subjects)->getUri();
18
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   116
                try {
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   117
                    $gs->insert($doc, $subject);
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   118
                }
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   119
                catch(\Exception $e) {
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   120
                    // just log not much we can do here...
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   121
                    $this->error("\nError on insert $identifier ($docRdfUrl) : $e");
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   122
                    Log::error("Error on insert $identifier ($docRdfUrl) : $e");
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   123
                    $code = $e->getCode();
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   124
                    $message = $e->getMessage();
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   125
                    if($e instanceof EasyRdf\Exception && stripos($message, 'timed out')>=0 && $insertTimeout<= ImportCocoonRDF::INSERT_TIMEOUT_RETRY) {
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   126
                        $this->info("\nThis is a timeout, we continue.");
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   127
                        Log::info("This is a timeout, we continue.");
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   128
                        $insertTimeouts++;
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   129
                        continue;
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   130
                    }
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   131
                    throw $e;
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   132
                }
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   133
            }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   134
        }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   135
        $progressBar->setMessage("finished");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   136
        $progressBar->finish();
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   137
    }
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
   138
}