server/src/app/Console/Commands/ImportCocoonRDF.php
author ymh <ymh.work@gmail.com>
Fri, 27 Nov 2015 17:59:36 +0100
changeset 19 eadaf0b8f02e
parent 18 f2a40bbc27f6
child 114 8af5ed0521a2
permissions -rw-r--r--
Bo conception step. back to ember page
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     1
<?php
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     3
namespace CorpusParole\Console\Commands;
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     4
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     5
use Config;
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
     6
use Log;
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
use Illuminate\Console\Command;
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
use Symfony\Component\Console\Input\InputOption;
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
use Symfony\Component\Console\Input\InputArgument;
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    10
use Phpoaipmh\Client;
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    11
use Phpoaipmh\Endpoint;
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    13
class ImportCocoonRDF extends Command {
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    14
18
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
    15
    const INSERT_TIMEOUT_RETRY = 5;
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
    16
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    17
    const MAPPER_CLASS_MAP = [
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    18
        "http://purl.org/dc/dcmitype/Sound" => '\CorpusParole\Libraries\Mappers\CocoonSoundRdfMapper',
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    19
        "http://purl.org/dc/dcmitype/MovingImage" => '\CorpusParole\Libraries\Mappers\CocoonSoundRdfMapper',
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    20
        "http://purl.org/dc/dcmitype/Text" => '\CorpusParole\Libraries\Mappers\CocoonTextRdfMapper',
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    21
        "http://purl.org/dc/dcmitype/Collection" => '\CorpusParole\Libraries\Mappers\CocoonCollectionRdfMapper'
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    22
    ];
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    23
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    24
    /**
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    25
     * The console command description.
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    26
     *
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    27
     * @var string
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    28
     */
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    29
    protected $description = 'Import Rdf from Cocoon.';
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    30
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    31
    /**
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    32
    * The name and signature of the console command.
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    33
    *
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    34
    * @var string
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    35
    */
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    36
    protected $signature = 'corpus-parole:importRDF {--skip=0 : Number of record to skip} {--raw : Register raw}';
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    38
    /**
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    39
     * Create a new command instance.
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    40
     */
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    41
    public function __construct() {
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    42
        parent::__construct();
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    43
    }
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    45
    /**
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    46
     * Get the list of dcmi types for the graph
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    47
     */
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    48
    private function getDocTypes($doc, $docUri) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    49
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    50
        $res = $doc->resource($docUri);
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    51
        $docTypes = [];
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    52
        //foreach ($res->all("http://purl.org/dc/elements/1.1/type") as $resType) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    53
        foreach ($res->all("dc11:type","resource") as $resType) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    54
            $type = $resType->getUri();
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    55
            if(0 === strpos($type, 'http://purl.org/dc/dcmitype/')) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    56
                $docTypes[] = $type;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    57
            }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    58
        }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    59
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    60
        // if the doc type list is empty, check that we have a collection
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    61
        if(empty($docTypes)) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    62
            if(!empty($doc->allOfType('edm:Collection'))) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    63
                $docTypes[] = "http://purl.org/dc/dcmitype/Collection";
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    64
            }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    65
        }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    66
        return $docTypes;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    67
    }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    68
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    69
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    70
    /**
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    71
     * Execute the console command.
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    72
     *
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    73
     * @return mixed
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    74
     */
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    75
    public function fire() {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    76
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    77
        libxml_use_internal_errors(true);
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    78
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    79
        $skip = (int)$this->option('skip');
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    80
        $raw = $this->option('raw');
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    81
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    82
        $this->comment("Skipping $skip records");
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    83
        $this->comment("Recording raw queries: $raw");
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    84
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    85
        $gs = new \EasyRdf\Sparql\Client(Config::get('corpusparole.sesame_query_url'), Config::get('corpusparole.sesame_update_url'));
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    86
        $gs_raw = new \EasyRdf\Sparql\Client(Config::get('corpusparole.sesame_query_url_raw'), Config::get('corpusparole.sesame_update_url_raw'));
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    87
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    89
        $client = new Client(Config::get('corpusparole.cocoon_oaipmh_url'));
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    90
        $endpoint = new Endpoint($client);
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    91
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    92
        $recs = $endpoint->listRecords('olac', null, null, 'LanguesDeFrance');
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    93
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    94
        //TODO : treat timeout exceptions
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    95
        $progressBar = $this->output->createProgressBar($recs->getTotalRecordsInCollection());
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    96
        $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%');
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    97
18
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
    98
        $insertTimeouts = 0;
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
    99
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   100
        $documentCounts = ['all' => 0, 'unknown' => 0];
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   101
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   102
        foreach ($recs as $item) {
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   103
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   104
            $identifier = (string) $item->xpath('/record/header/identifier')[0];
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   105
            $docRdfUrl = Config::get('corpusparole.cocoon_rdf_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base')));
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   106
            $message = "$identifier : $docRdfUrl";
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   107
            if($recs->getNumRetrieved() <= $skip) {
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   108
                $progressBar->setMessage("$message - Skipping");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   109
                $progressBar->advance();
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   110
                continue;
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   111
            }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   112
            $progressBar->setMessage($message);
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   113
            $progressBar->advance();
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
   114
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   115
            $docUri = config('corpusparole.cocoon_doc_id_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base')));
18
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   116
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   117
            $docLoaded = false;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   118
            $loadRetry = 0;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   119
            $doc = null;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   120
            while(!$docLoaded && $loadRetry < config('corpusparole.max_load_retry', 3)) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   121
                $loadRetry++;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   122
                try {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   123
                    $doc = new \EasyRdf\Graph($docRdfUrl);
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   124
                    $doc->load();
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   125
                    $docLoaded = true;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   126
                }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   127
                //TODO: catch network exception - add error to database
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   128
                catch(\Exception $e) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   129
                    $code = $e->getCode();
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   130
                    $message = $e->getMessage();
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   131
                    $this->info("\nError processing $identifier. code : $code, message: $message");
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   132
                    Log::debug("Error processing $identifier. code : $code, message: $message");
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   133
                    if($code == 0 && stripos($message, 'timed out')>=0 ) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   134
                        $this->info("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying");
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   135
                        Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying");
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   136
                        continue;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   137
                    }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   138
                    else {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   139
                        $this->error("\nError processing $identifier ($docRdfUrl) : $e");
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   140
                        Log::error("Error processing $identifier ($docRdfUrl) : $e");
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   141
                        break;
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   142
                    }
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   143
                    //$this->error(print_r($e->getTraceAsString(),true));
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   144
                }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   145
            }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   146
            if(!$docLoaded) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   147
                continue;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   148
            }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   149
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   150
            //insert raw
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   151
            if($raw) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   152
                $resDocsRaw = $gs_raw->query("ASK WHERE { GRAPH <$docUri> { ?s ?p ?o }}");
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   153
                if($resDocsRaw->getBoolean()) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   154
                    $gs_raw->clear($docUri);
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   155
                }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   156
                $gs_raw->insert($doc, $docUri);
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   157
            }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   158
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   159
            //map doc
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   160
            $inputDocTypes = $this->getDocTypes($doc, $docUri);
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   161
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   162
            $docType = count($inputDocTypes)>0? $inputDocTypes[0]:null;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   163
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   164
            if(is_null($docType) || !array_key_exists($docType,ImportCocoonRDF::MAPPER_CLASS_MAP)) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   165
                $this->error("\nError processing $identifier ($docRdfUrl) : $docType unknown mapper");
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   166
                Log::error("Error processing $identifier ($docRdfUrl) : $docType unknown mapper");
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   167
                $documentCounts['unknown'] += 1;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   168
                continue;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   169
            }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   170
            $documentCounts['all'] += 1;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   171
            $documentCounts[$docType] = isset($documentCounts[$docType])?$documentCounts[$docType]+1:1;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   172
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   173
            $mapperClass = ImportCocoonRDF::MAPPER_CLASS_MAP[$docType];
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   174
            $mapper = new $mapperClass($doc, $docUri);
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   175
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   176
            $mapper->mapGraph();
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   177
            $mappedGraphes = $mapper->getOutputGraphes();
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   178
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   179
            foreach ($mapper->getOutputGraphes() as $mappedGraphKey => $mappedGraph) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   180
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   181
                $mappedGraphUri = $mappedGraph->getUri();
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   182
                try {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   183
                    $resDocs = $gs->query("CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <$mappedGraphUri> { ?s ?p ?o }}");
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   184
                } catch (\Exception $e) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   185
                    $this->error("\nError on graph query $identifier ($mappedGraphUri) : $e \n" . $e->getBody() . "\n");
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   186
                    Log::error("\nError on graph query $identifier ($mappedGraphUri) : $e \n" . $e->getBody());
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   187
                    exit;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   188
                }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   189
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   190
                $mergedGraph = null;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   191
                $doDelete = true;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   192
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   193
                if($resDocs->isEmpty()) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   194
                    $mergedGraph = $mappedGraph;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   195
                    $doDelete = false;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   196
                }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   197
                else {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   198
                    $doDelete = true;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   199
                    $mappedTypes = $this->getDocTypes($mappedGraph, $mappedGraphUri);
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   200
                    $presentTypes = $this->getDocTypes($resDocs, $mappedGraphUri);
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   201
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   202
                    if($docType == "http://purl.org/dc/dcmitype/Collection" || in_array("http://purl.org/dc/dcmitype/Collection", $mappedTypes)) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   203
                        $merger = new \CorpusParole\Libraries\Mergers\CocoonCollectionRdfMerger();
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   204
                        $baseGraph = $resDocs;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   205
                        $sourceGraph = $mappedGraph;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   206
                    }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   207
                    elseif ($docType == "http://purl.org/dc/dcmitype/Text") {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   208
                        $merger = new \CorpusParole\Libraries\Mergers\CocoonSoundRdfMerger();
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   209
                        $baseGraph = $resDocs;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   210
                        $sourceGraph = $mappedGraph;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   211
                    }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   212
                    else {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   213
                        $merger = new \CorpusParole\Libraries\Mergers\CocoonSoundRdfMerger();
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   214
                        $baseGraph = $mappedGraph;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   215
                        $sourceGraph = $resDocs;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   216
                    }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   217
                    $mergedGraph = $merger->mergeGraph($baseGraph, $sourceGraph, $mappedGraphUri);
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   218
                    if(\EasyRdf\Isomorphic::isomorphic($resDocs, $mergedGraph)) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   219
                        //graph are isomorphic no need to go farther for this graph
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   220
                        Log::info("Graph are isomorphic for $mappedGraphUri (from $identifier : $docRdfUrl), skipping");
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   221
                        continue;
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   222
                    }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   223
                }
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   224
18
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   225
                try {
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   226
                    if($doDelete) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   227
                        $gs->clear($mappedGraphUri);
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   228
                    }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   229
                    $gs->insert($mergedGraph, $mappedGraphUri);
18
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   230
                }
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   231
                catch(\Exception $e) {
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   232
                    // just log not much we can do here...
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   233
                    $this->error("\nError on insert $identifier ($docRdfUrl) : $e");
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   234
                    Log::error("Error on insert $identifier ($docRdfUrl) : $e");
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   235
                    $code = $e->getCode();
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   236
                    $message = $e->getMessage();
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   237
                    if($e instanceof EasyRdf\Exception && stripos($message, 'timed out')>=0 && $insertTimeout<= ImportCocoonRDF::INSERT_TIMEOUT_RETRY) {
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   238
                        $this->info("\nThis is a timeout, we continue.");
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   239
                        Log::info("This is a timeout, we continue.");
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   240
                        $insertTimeouts++;
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   241
                        continue;
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   242
                    }
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   243
                    throw $e;
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   244
                }
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   245
            }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   246
        }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   247
        $progressBar->setMessage("finished");
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   248
        $progressBar->finish();
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   249
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   250
        $this->info("\nDocument count info: ");
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   251
        foreach ($documentCounts as $docType => $docCount) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   252
            $this->info("$docType => $docCount");
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   253
        }
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   254
    }
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
   255
}