server/src/app/Console/Commands/ImportCocoonRDF.php
author ymh <ymh.work@gmail.com>
Mon, 19 Mar 2018 16:04:43 +0100
changeset 571 6f852d0f7760
parent 544 ad58d7627f70
permissions -rw-r--r--
Samall readme correction
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     1
<?php
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     3
namespace CorpusParole\Console\Commands;
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     4
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     5
use Config;
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
     6
use Log;
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
use Illuminate\Console\Command;
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
use Symfony\Component\Console\Input\InputOption;
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
use Symfony\Component\Console\Input\InputArgument;
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    10
use Phpoaipmh\Client;
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    11
use Phpoaipmh\Endpoint;
544
ad58d7627f70 use same http client in ImportCocoonRDF and define version 0.22
ymh <ymh.work@gmail.com>
parents: 531
diff changeset
    12
use Phpoaipmh\HttpAdapter\GuzzleAdapter;
531
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
    13
use CorpusParole\Libraries\Sparql\GuzzleSparqlClient;
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    15
class ImportCocoonRDF extends Command {
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    16
18
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
    17
    const INSERT_TIMEOUT_RETRY = 5;
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
    18
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    19
    const MAPPER_CLASS_MAP = [
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    20
        "http://purl.org/dc/dcmitype/Sound" => '\CorpusParole\Libraries\Mappers\CocoonSoundRdfMapper',
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    21
        "http://purl.org/dc/dcmitype/MovingImage" => '\CorpusParole\Libraries\Mappers\CocoonSoundRdfMapper',
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    22
        "http://purl.org/dc/dcmitype/Text" => '\CorpusParole\Libraries\Mappers\CocoonTextRdfMapper',
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    23
        "http://purl.org/dc/dcmitype/Collection" => '\CorpusParole\Libraries\Mappers\CocoonCollectionRdfMapper'
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    24
    ];
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    25
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    26
    /**
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    27
     * The console command description.
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    28
     *
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    29
     * @var string
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    30
     */
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    31
    protected $description = 'Import Rdf from Cocoon.';
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    32
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    33
    /**
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    34
    * The name and signature of the console command.
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    35
    *
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    36
    * @var string
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    37
    */
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    38
    protected $signature = "corpus-parole:importRDF
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    39
        {--skip=0 : Number of record to skip}
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    40
        {--no-raw : Do not record raw queries}
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    41
        {--no-raw-clear : Do not clear raw repository}
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    42
        {--clear : Clear repository}
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    43
        {--force-import : Overwrite document from import event if the repo version is more recent}
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    44
        {--keep-repo-doc : Keep the existing doc in repo (default is replace document)}
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    45
    ";
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    46
    //protected $signature = 'corpus-parole:importRDF {--skip=0 : Number of record to skip}';
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    48
    /**
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    49
     * Create a new command instance.
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    50
     */
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    51
    public function __construct() {
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    52
        parent::__construct();
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    53
    }
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    55
    /**
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    56
     * Get the list of dcmi types for the graph
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    57
     */
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    58
    private function getDocTypes($doc, $docUri) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    59
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    60
        $res = $doc->resource($docUri);
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    61
        $docTypes = [];
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    62
        //foreach ($res->all("http://purl.org/dc/elements/1.1/type") as $resType) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    63
        foreach ($res->all("dc11:type","resource") as $resType) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    64
            $type = $resType->getUri();
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    65
            if(0 === strpos($type, 'http://purl.org/dc/dcmitype/')) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    66
                $docTypes[] = $type;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    67
            }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    68
        }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    69
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    70
        // if the doc type list is empty, check that we have a collection
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    71
        if(empty($docTypes)) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    72
            if(!empty($doc->allOfType('edm:Collection'))) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    73
                $docTypes[] = "http://purl.org/dc/dcmitype/Collection";
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    74
            }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    75
        }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    76
        return $docTypes;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    77
    }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    78
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    79
    /**
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    80
     * Map a documents into graphes.
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    81
     */
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    82
    public function mapDoc($doc, $docUri) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    83
        $inputDocTypes = $this->getDocTypes($doc, $docUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    84
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    85
        $docType = count($inputDocTypes)>0? $inputDocTypes[0]:null;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    86
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    87
        if(is_null($docType) || !array_key_exists($docType,ImportCocoonRDF::MAPPER_CLASS_MAP)) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    88
            $this->error("\nError processing $identifier ($docRdfUrl) : $docType unknown mapper");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    89
            Log::error("Error processing $identifier ($docRdfUrl) : $docType unknown mapper");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    90
            $this->documentCount['unknown'] += 1;
518
4864076bf0e3 Correct error cases after code reorganization
ymh <ymh.work@gmail.com>
parents: 513
diff changeset
    91
            return ['unknown', null];
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    92
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    93
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    94
        $mapperClass = ImportCocoonRDF::MAPPER_CLASS_MAP[$docType];
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    95
        $mapper = new $mapperClass($doc, $docUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    96
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    97
        try {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    98
            $mapper->mapGraph();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    99
        } catch (\Exception $e) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   100
            Log::error("Error processing $identifier ($docRdfUrl) : error mapping graph : $e");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   101
            $this->documentCount['error'] += 1;
518
4864076bf0e3 Correct error cases after code reorganization
ymh <ymh.work@gmail.com>
parents: 513
diff changeset
   102
            return ['error', null];
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   103
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   104
        $this->documentCount['all'] += 1;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   105
        $this->documentCount[$docType] = isset($this->documentCount[$docType])?$this->documentCount[$docType]+1:1;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   106
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   107
        return [$docType, $mapper->getOutputGraphes()];
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   108
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   109
    }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   110
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   111
    public function mergeDocs($docType, $outputGraphes) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   112
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   113
        foreach ($outputGraphes as $mappedGraphKey => $mappedGraph) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   114
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   115
            $mappedGraphUri = $mappedGraph->getUri();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   116
            try {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   117
                $resDocs = $this->gs->query("CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <$mappedGraphUri> { ?s ?p ?o }}");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   118
            } catch (\Exception $e) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   119
                $this->error("\nError on graph query $mappedGraphUri : $e \n" . $e->getMessage() . "\n");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   120
                Log::error("\nError on graph query $mappedGraphUri : $e \n" . $e->getMessage());
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   121
                exit;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   122
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   123
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   124
            $mergedGraph = null;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   125
            $doDelete = true;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   126
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   127
            if($resDocs->isEmpty()) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   128
                $mergedGraph = $mappedGraph;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   129
                $doDelete = false;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   130
            } else {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   131
                $doDelete = true;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   132
                $mappedTypes = $this->getDocTypes($mappedGraph, $mappedGraphUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   133
                $presentTypes = $this->getDocTypes($resDocs, $mappedGraphUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   134
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   135
                if($docType == "http://purl.org/dc/dcmitype/Collection" || in_array("http://purl.org/dc/dcmitype/Collection", $mappedTypes)) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   136
                    $merger = new \CorpusParole\Libraries\Mergers\CocoonCollectionRdfMerger();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   137
                    $baseGraph = $resDocs;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   138
                    $sourceGraph = $mappedGraph;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   139
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   140
                elseif ($docType == "http://purl.org/dc/dcmitype/Text") {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   141
                    $merger = new \CorpusParole\Libraries\Mergers\CocoonTextRdfMerger();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   142
                    $baseGraph = $resDocs;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   143
                    $sourceGraph = $mappedGraph;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   144
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   145
                else {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   146
                    $merger = new \CorpusParole\Libraries\Mergers\CocoonSoundRdfMerger();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   147
                    $baseGraph = $mappedGraph;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   148
                    $sourceGraph = $resDocs;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   149
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   150
                $mergedGraph = $merger->mergeGraph($baseGraph, $sourceGraph, $mappedGraphUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   151
                if(\EasyRdf\Isomorphic::isomorphic($resDocs, $mergedGraph)) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   152
                    //graph are isomorphic no need to go farther for this graph
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   153
                    Log::info("Graph are isomorphic for $mappedGraphUri, skipping");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   154
                    continue;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   155
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   156
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   157
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   158
            try {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   159
                if($doDelete) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   160
                    $this->gs->clear($mappedGraphUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   161
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   162
                $this->gs->insert($mergedGraph, $mappedGraphUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   163
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   164
            catch(\Exception $e) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   165
                // just log not much we can do here...
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   166
                $this->error("\nError on insert $mappedGraphUri : $e");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   167
                Log::error("Error on insert $mappedGraphUri : $e");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   168
                $code = $e->getCode();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   169
                $message = $e->getMessage();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   170
                if($e instanceof EasyRdf\Exception && stripos($message, 'timed out')>=0 && $insertTimeout<= ImportCocoonRDF::INSERT_TIMEOUT_RETRY) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   171
                    $this->info("\nThis is a timeout, we continue.");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   172
                    Log::info("This is a timeout, we continue.");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   173
                    $insertTimeouts++;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   174
                    continue;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   175
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   176
                throw $e;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   177
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   178
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   179
    }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   180
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   181
    function getModified($graph) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   182
        // get first element of array
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   183
        $providedCHORes = $graph->allOfType('http://www.europeana.eu/schemas/edm/ProvidedCHO');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   184
        $providedCHO = reset($providedCHORes);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   185
        if($providedCHO === false) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   186
            $date = new \DateTime();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   187
            $date->setTimestamp(0);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   188
            return $date;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   189
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   190
        $modified = $providedCHO->getLiteral("<http://purl.org/dc/terms/modified>");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   191
        if(is_null($modified)) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   192
            $date = new \DateTime();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   193
            $date->setTimestamp(0);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   194
            return $date;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   195
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   196
        return \DateTime::createFromFormat(\DateTime::W3C, $modified->getValue());
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   197
    }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   198
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   199
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   200
    /**
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   201
     * Execute the console command.
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   202
     *
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   203
     * @return mixed
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   204
     */
526
cdaf9dfb5dfd correct licence problem in bug #0026523
ymh <ymh.work@gmail.com>
parents: 518
diff changeset
   205
    public function handle() {
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   206
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   207
        libxml_use_internal_errors(true);
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   208
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   209
        $skip = (int)$this->option('skip');
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   210
        $raw = !$this->option('no-raw');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   211
        $rawClear = !$this->option('no-raw-clear');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   212
        $clear = $this->option('clear');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   213
        $forceImport = $this->option('force-import');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   214
        $keepRepoDoc = $this->option('keep-repo-doc');
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   215
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   216
        $this->comment("Skipping $skip records");
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   217
        $this->comment("Querying Cocoon: ".($raw?'TRUE':'FALSE'));
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   218
        $this->comment("Clear raw repository: ".($rawClear?'TRUE':'FALSE'));
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   219
        $this->comment("Clear repository: ".($clear?'TRUE':'FALSE'));
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   220
        $this->comment("Keep existing document into repository: ".($keepRepoDoc?'TRUE':'FALSE'));
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   221
        $this->comment("Overwrite more recent document:".($forceImport?'TRUE':'FALSE'));
18
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   222
531
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   223
        $this->httpClient = app()->make('Guzzle');
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   224
        $this->gs = new GuzzleSparqlClient($this->httpClient, Config::get('corpusparole.rdf4j_query_url'), Config::get('corpusparole.rdf4j_update_url'));
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   225
        $this->gs_raw = new GuzzleSparqlClient($this->httpClient, Config::get('corpusparole.rdf4j_query_url_raw'), Config::get('corpusparole.rdf4j_update_url_raw'));
18
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   226
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   227
        $this->documentCount = [
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   228
            'all' => 0,
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   229
            'unknown' => 0,
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   230
            'error' => 0,
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   231
            'raw_duplicates' => 0,
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   232
            'modified' => 0,
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   233
            'replaced' => 0
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   234
        ];
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   235
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   236
        if($raw) {
544
ad58d7627f70 use same http client in ImportCocoonRDF and define version 0.22
ymh <ymh.work@gmail.com>
parents: 531
diff changeset
   237
            $client = new Client(Config::get('corpusparole.cocoon_oaipmh_url'), new GuzzleAdapter($this->httpClient));
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   238
            $endpoint = new Endpoint($client);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   239
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   240
            $recs = $endpoint->listRecords('olac', null, null, 'LanguesDeFrance');
506
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 412
diff changeset
   241
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   242
            $progressBar = $this->output->createProgressBar($recs->getTotalRecordsInCollection());
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   243
            $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   244
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   245
            $insertTimeouts = 0;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   246
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   247
            //Clear raw repository if asked
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   248
            if($rawClear) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   249
                $this->gs_raw->clear("all");
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   250
            }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   251
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   252
            foreach ($recs as $item) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   253
                $item->registerXPathNamespace('oai', "http://www.openarchives.org/OAI/2.0/");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   254
                $identifier = (string) $item->xpath('oai:header/oai:identifier')[0];
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   255
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   256
                $docRdfUrl = Config::get('corpusparole.cocoon_rdf_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base')));
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   257
                $message = "$identifier : $docRdfUrl";
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   258
                if($recs->getNumRetrieved() <= $skip) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   259
                    $progressBar->setMessage("$message - Skipping");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   260
                    $progressBar->advance();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   261
                    continue;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   262
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   263
                $progressBar->setMessage($message);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   264
                $progressBar->advance();
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   265
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   266
                $docUri = config('corpusparole.cocoon_doc_id_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base')));
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   267
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   268
                $docLoaded = false;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   269
                $loadRetry = 0;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   270
                $doc = null;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   271
                while(!$docLoaded && $loadRetry < config('corpusparole.max_load_retry', 3)) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   272
                    $loadRetry++;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   273
                    try {
531
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   274
                        $resp = $this->httpClient->get($docRdfUrl);
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   275
                        $content_type = $resp->getHeader('Content-Type');
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   276
                        $format = null;
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   277
                        if(is_array($content_type) && count($content_type)>0) {
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   278
                            list($format, ) = \EasyRdf\Utils::parseMimeType($content_type[0]);
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   279
                        }
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   280
                        $doc = new \EasyRdf\Graph($docRdfUrl, $resp->getBody(), $format);
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   281
                        $docLoaded = true;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   282
                    }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   283
                    //TODO: catch network exception - add error to database
531
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   284
                    catch(\GuzzleHttp\Exception\ConnectException $e) {
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   285
                        $this->info("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying");
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   286
                        Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying");
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   287
                        continue;
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   288
                    }
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   289
                    catch(\GuzzleHttp\Exception\ClientException $e) {
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   290
                        if($e->getResponse()->getStatusCode() == 400) {
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   291
                            $this->info("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   292
                            Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   293
                            continue;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   294
                        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   295
                        else {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   296
                            $this->error("\nError processing $identifier ($docRdfUrl) : $e");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   297
                            Log::error("Error processing $identifier ($docRdfUrl) : $e");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   298
                            break;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   299
                        }
531
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   300
                    }
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   301
                    catch(\Exception $e) {
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   302
                        $this->error("\nError processing $identifier ($docRdfUrl) : $e");
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   303
                        Log::error("Error processing $identifier ($docRdfUrl) : $e");
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   304
                        break;
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   305
                    }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   306
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   307
                if(!$docLoaded) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   308
                    $this->documentCount['error'] += 1;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   309
                    continue;
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   310
                }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   311
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   312
                $resDocsRaw = $this->gs_raw->query("ASK WHERE { GRAPH <$docUri> { ?s ?p ?o }}");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   313
                if($resDocsRaw->getBoolean()) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   314
                    $this->gs_raw->clear($docUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   315
                    $this->documentCount['raw_duplicates'] += 1;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   316
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   317
                $this->gs_raw->insert($doc, $docUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   318
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   319
            $progressBar->setMessage("finished raw import");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   320
            $progressBar->finish();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   321
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   322
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   323
        if($clear) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   324
            $this->gs->clear("all");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   325
        }
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   326
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   327
        $collectionDocsUris = $this->gs_raw->query("SELECT distinct ?uri WHERE {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   328
            GRAPH ?uri {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   329
                ?s <http://purl.org/dc/elements/1.1/type> <http://purl.org/dc/dcmitype/Collection>.
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   330
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   331
        }");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   332
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   333
        $collectionCount = count($collectionDocsUris);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   334
        $this->info("\nImporting $collectionCount Collections from raw repository");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   335
        $progressBar = $this->output->createProgressBar($collectionCount);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   336
        $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   337
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   338
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   339
        foreach($collectionDocsUris as $docUriRes) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   340
            $docUri = $docUriRes->uri->getUri();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   341
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   342
            $progressBar->setMessage("Importing collection $docUri.");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   343
            $progressBar->advance();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   344
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   345
            $doc = $this->gs_raw->query("CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <$docUri> { ?s ?p ?o. }}");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   346
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   347
            //map the doc
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   348
            list($docType, $mappedGraphes) = $this->mapDoc($doc, $docUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   349
518
4864076bf0e3 Correct error cases after code reorganization
ymh <ymh.work@gmail.com>
parents: 513
diff changeset
   350
            if($docType === 'unknown' || $docType === 'error') {
4864076bf0e3 Correct error cases after code reorganization
ymh <ymh.work@gmail.com>
parents: 513
diff changeset
   351
                // The error has been traced in mapDoc
4864076bf0e3 Correct error cases after code reorganization
ymh <ymh.work@gmail.com>
parents: 513
diff changeset
   352
                continue;
4864076bf0e3 Correct error cases after code reorganization
ymh <ymh.work@gmail.com>
parents: 513
diff changeset
   353
            }
4864076bf0e3 Correct error cases after code reorganization
ymh <ymh.work@gmail.com>
parents: 513
diff changeset
   354
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   355
            //merge the result docs
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   356
            $this->mergeDocs($docType, $mappedGraphes);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   357
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   358
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   359
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   360
        $progressBar->setMessage("finished raw import for collections.");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   361
        $progressBar->finish();
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   362
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   363
        // list the existing documents
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   364
        $providedCHODocsUris = [];
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   365
        $providedCHODocsUrisRes = $this->gs->query("SELECT distinct ?uri WHERE {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   366
            GRAPH ?uri {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   367
                ?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.europeana.eu/schemas/edm/ProvidedCHO>.
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   368
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   369
        }");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   370
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   371
        foreach($providedCHODocsUrisRes as $docUriRes) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   372
            array_push($providedCHODocsUris, $docUriRes->uri->getUri());
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   373
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   374
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   375
        $this->info("\n\nWe have ".count($providedCHODocsUris)." providedCHO in database.\n");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   376
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   377
        $soundDocsUris = $this->gs_raw->query("SELECT distinct ?uri WHERE {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   378
            GRAPH ?uri {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   379
                ?s <http://purl.org/dc/elements/1.1/type> ?o.
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   380
                FILTER(?o IN (<http://purl.org/dc/dcmitype/Sound>, <http://purl.org/dc/dcmitype/MovingImage>))
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   381
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   382
        }");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   383
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   384
        $soundCount = count($soundDocsUris);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   385
        $this->info("\nImporting $soundCount Sound (or Moving Image) from raw repository\n");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   386
        $progressBar = $this->output->createProgressBar($soundCount);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   387
        $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   388
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   389
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   390
        foreach($soundDocsUris as $docUriRes) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   391
            $docUri = $docUriRes->uri->getUri();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   392
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   393
            $progressBar->setMessage("Importing Sound (or Moving Image) $docUri.");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   394
            $progressBar->advance();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   395
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   396
            $doc = $this->gs_raw->query("CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <$docUri> { ?s ?p ?o. }}");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   397
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   398
            //map the doc
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   399
            list($docType, $mappedGraphes) = $this->mapDoc($doc, $docUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   400
            $firstGraph = reset($mappedGraphes); // first graph is main graph
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   401
            // remove it from list of existing graphes in repository
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   402
            $firstGraphUri = $firstGraph->getUri();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   403
            if(($key = array_search($firstGraphUri, $providedCHODocsUris)) !== false) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   404
               unset($providedCHODocsUris[$key]);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   405
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   406
            //if asked, delete it from repository. check modified date
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   407
            //merge the result docs
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   408
            try {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   409
                $resDocs = $this->gs->query("CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <$firstGraphUri> { ?s ?p ?o }}");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   410
            } catch (\Exception $e) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   411
                $this->error("\nError on graph query $firstGraphUri : $e \n" . $e->getMessage() . "\n");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   412
                Log::error("\nError on graph query $firstGraphUri : $e \n" . $e->getMessage());
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   413
                exit;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   414
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   415
            $doDelete = true;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   416
            if($resDocs->isEmpty()) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   417
                $doDelete = false;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   418
            } else {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   419
                // get modified from repo
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   420
                $dateRepo = $this->getModified($resDocs);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   421
                // get modified from import
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   422
                $dateImport = $this->getModified($firstGraph);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   423
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   424
                if($dateRepo > $dateImport) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   425
                    $this->documentCount['modified'] += 1;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   426
                    $doDelete = $forceImport;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   427
                } else {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   428
                    $doDelete = !$keepRepoDoc;
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   429
                }
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   430
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   431
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   432
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   433
            if($doDelete) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   434
                $this->documentCount['replaced'] += 1;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   435
                $this->gs->clear($firstGraphUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   436
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   437
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   438
            $this->mergeDocs($docType, $mappedGraphes);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   439
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   440
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   441
        $progressBar->setMessage("finished raw import for sounds.");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   442
        $progressBar->finish();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   443
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   444
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   445
        $textDocsUris = $this->gs_raw->query("SELECT distinct ?uri WHERE {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   446
            GRAPH ?uri {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   447
                ?s <http://purl.org/dc/elements/1.1/type> <http://purl.org/dc/dcmitype/Text>.
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   448
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   449
        }");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   450
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   451
        $textCount = count($textDocsUris);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   452
        $this->info("\n\nImporting $textCount text from raw repository\n");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   453
        $progressBar = $this->output->createProgressBar($textCount);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   454
        $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   455
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   456
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   457
        foreach($textDocsUris as $docUriRes) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   458
            $docUri = $docUriRes->uri->getUri();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   459
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   460
            $progressBar->setMessage("Importing Text $docUri.");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   461
            $progressBar->advance();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   462
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   463
            $doc = $this->gs_raw->query("CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <$docUri> { ?s ?p ?o. }}");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   464
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   465
            //map the doc
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   466
            list($docType, $mappedGraphes) = $this->mapDoc($doc, $docUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   467
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   468
            //merge the result docs
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   469
            $this->mergeDocs($docType, $mappedGraphes);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   470
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   471
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   472
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   473
        $progressBar->setMessage("finished raw import for text.");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   474
        $progressBar->finish();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   475
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   476
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   477
        // delete left overs from previous repository
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   478
        $this->info("\n\nThere is ".count($providedCHODocsUris)." documents left-over.\n");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   479
        if(count($providedCHODocsUris) > 0 && $delete_old) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   480
            foreach($providedCHODocsUris as $graphUri) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   481
                $this->gs->clear($graphUri);
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   482
            }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   483
        }
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   484
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   485
        $this->info("\n\nDocument count info: ");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   486
        foreach ($this->documentCount as $docType => $docCount) {
172
660570f13537 Add error counter to importRDF command + retry on code 400 for sparql download
ymh <ymh.work@gmail.com>
parents: 118
diff changeset
   487
            if($docType == 'error' && $docCount > 0) {
660570f13537 Add error counter to importRDF command + retry on code 400 for sparql download
ymh <ymh.work@gmail.com>
parents: 118
diff changeset
   488
                $this->error("$docType => $docCount");
660570f13537 Add error counter to importRDF command + retry on code 400 for sparql download
ymh <ymh.work@gmail.com>
parents: 118
diff changeset
   489
            } else {
660570f13537 Add error counter to importRDF command + retry on code 400 for sparql download
ymh <ymh.work@gmail.com>
parents: 118
diff changeset
   490
                $this->info("$docType => $docCount");
660570f13537 Add error counter to importRDF command + retry on code 400 for sparql download
ymh <ymh.work@gmail.com>
parents: 118
diff changeset
   491
            }
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   492
        }
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   493
    }
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
   494
}