server/src/app/Console/Commands/ImportCocoonRDF.php
author nowmad@23.1.168.192.in-addr.arpa
Fri, 13 Nov 2015 11:01:27 +0100
changeset 13 ba32dd4cf8d3
parent 4 f55970e41793
child 18 f2a40bbc27f6
permissions -rw-r--r--
update styles
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     1
<?php
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     3
namespace CorpusParole\Console\Commands;
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     4
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     5
use Config;
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
     6
use Log;
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
use Illuminate\Console\Command;
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
use Symfony\Component\Console\Input\InputOption;
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
use Symfony\Component\Console\Input\InputArgument;
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    10
use Phpoaipmh\Client;
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    11
use Phpoaipmh\Endpoint;
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    13
class ImportCocoonRDF extends Command {
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    14
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    15
    /**
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    16
     * The console command description.
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    17
     *
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    18
     * @var string
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    19
     */
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    20
    protected $description = 'Import Rdf from Cocoon.';
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    21
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    22
    /**
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    23
    * The name and signature of the console command.
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    24
    *
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    25
    * @var string
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    26
    */
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    27
    protected $signature = 'corpus-parole:importRDF {--skip=0 : Number of record to skip}';
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    29
    /**
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    30
     * Create a new command instance.
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    31
     */
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    32
    public function __construct() {
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    33
        parent::__construct();
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    34
    }
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    36
    /**
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    37
     * Execute the console command.
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    38
     *
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    39
     * @return mixed
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    40
     */
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    41
    public function fire() {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    42
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    43
        libxml_use_internal_errors(true);
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    44
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    45
        $skip = (int)$this->option('skip');
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    46
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    47
        $this->comment("Skipping $skip records");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    48
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    49
        $gs = new \EasyRdf\Sparql\Client(Config::get('corpusparole.sesame_query_url'), Config::get('corpusparole.sesame_update_url'));
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    50
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    52
        $client = new Client(Config::get('corpusparole.cocoon_oaipmh_url'));
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    53
        $endpoint = new Endpoint($client);
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    54
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    55
        $recs = $endpoint->listRecords('olac', null, null, 'LanguesDeFrance');
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    56
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    57
        //TODO : treat timeout exceptions
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    58
        $progressBar = $this->output->createProgressBar($recs->getTotalRecordsInCollection());
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    59
        $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%');
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    61
        foreach ($recs as $item) {
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    62
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    63
            $identifier = (string) $item->xpath('/record/header/identifier')[0];
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    64
            $docRdfUrl = Config::get('corpusparole.cocoon_rdf_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base')));
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    65
            $message = "$identifier : $docRdfUrl";
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    66
            if($recs->getNumRetrieved() <= $skip) {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    67
                $progressBar->setMessage("$message - Skipping");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    68
                $progressBar->advance();
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    69
                continue;
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    70
            }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    71
            $progressBar->setMessage($message);
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    72
            $progressBar->advance();
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    74
            $docUri = config('corpusparole.cocoon_doc_id_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base')));
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    75
            $resDocs = $gs->query("ASK WHERE { GRAPH <$docUri> { ?s ?p ?o }}");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    76
            if(!$resDocs->getBoolean()) {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    77
                $docLoaded = false;
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    78
                $loadRetry = 0;
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    79
                while(!$docLoaded && $loadRetry < config('corpusparole.max_load_retry', 3)) {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    80
                    $loadRetry++;
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    81
                    try {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    82
                        $doc = new \EasyRdf\Graph($docRdfUrl);
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    83
                        $doc->load();
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    84
                        $docLoaded = true;
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    85
                    }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    86
                    //TODO: catch network exception - add error to database
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    87
                    catch(\Exception $e) {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    88
                        $code = $e->getCode();
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    89
                        $message = $e->getMessage();
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    90
                        $this->debug("\nError processing $identifier. code : $code, message: $message");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    91
                        Log::debug("Error processing $identifier. code : $code, message: $message");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    92
                        if($code == 1 && stripos($message, 'timed out')>=0 ) {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    93
                            $this->warning("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    94
                            Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    95
                            continue;
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    96
                        }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    97
                        else {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    98
                            $this->error("\nError processing $identifier ($docRdfUrl) : $e");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    99
                            Log::error("Error processing $identifier ($docRdfUrl) : $e");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   100
                            break;
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   101
                        }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   102
                        //$this->error(print_r($e->getTraceAsString(),true));
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   103
                    }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   104
                }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   105
                if(!$docLoaded) {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   106
                    continue;
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   107
                }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   108
                //TODO: treat errors
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   109
                $subjects = $doc->resources();
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   110
                $subject = reset($subjects)->getUri();
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   111
                //TODO: exceptions ? but if pb on insert probably we have to fail anyway
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   112
                $gs->insert($doc, $subject);
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   113
            }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   114
        }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   115
        $progressBar->setMessage("finished");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   116
        $progressBar->finish();
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   117
    }
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
   118
}