server/src/app/Console/Commands/ImportCocoonRDF.php
author ymh <ymh.work@gmail.com>
Fri, 09 Jun 2017 15:22:02 +0200
changeset 531 48f5380c26d0
parent 526 cdaf9dfb5dfd
child 544 ad58d7627f70
permissions -rw-r--r--
Replace EasyRdf http loading with guzzle to solve proxy problems
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     1
<?php
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     3
namespace CorpusParole\Console\Commands;
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     4
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
     5
use Config;
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
     6
use Log;
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
use Illuminate\Console\Command;
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
use Symfony\Component\Console\Input\InputOption;
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
use Symfony\Component\Console\Input\InputArgument;
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    10
use Phpoaipmh\Client;
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    11
use Phpoaipmh\Endpoint;
531
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
    12
use CorpusParole\Libraries\Sparql\GuzzleSparqlClient;
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    14
class ImportCocoonRDF extends Command {
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    15
18
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
    16
    const INSERT_TIMEOUT_RETRY = 5;
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
    17
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    18
    const MAPPER_CLASS_MAP = [
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    19
        "http://purl.org/dc/dcmitype/Sound" => '\CorpusParole\Libraries\Mappers\CocoonSoundRdfMapper',
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    20
        "http://purl.org/dc/dcmitype/MovingImage" => '\CorpusParole\Libraries\Mappers\CocoonSoundRdfMapper',
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    21
        "http://purl.org/dc/dcmitype/Text" => '\CorpusParole\Libraries\Mappers\CocoonTextRdfMapper',
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    22
        "http://purl.org/dc/dcmitype/Collection" => '\CorpusParole\Libraries\Mappers\CocoonCollectionRdfMapper'
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    23
    ];
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    24
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    25
    /**
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    26
     * The console command description.
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    27
     *
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    28
     * @var string
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    29
     */
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    30
    protected $description = 'Import Rdf from Cocoon.';
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    31
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    32
    /**
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    33
    * The name and signature of the console command.
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    34
    *
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    35
    * @var string
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    36
    */
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    37
    protected $signature = "corpus-parole:importRDF
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    38
        {--skip=0 : Number of record to skip}
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    39
        {--no-raw : Do not record raw queries}
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    40
        {--no-raw-clear : Do not clear raw repository}
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    41
        {--clear : Clear repository}
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    42
        {--force-import : Overwrite document from import event if the repo version is more recent}
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    43
        {--keep-repo-doc : Keep the existing doc in repo (default is replace document)}
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    44
    ";
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    45
    //protected $signature = 'corpus-parole:importRDF {--skip=0 : Number of record to skip}';
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    47
    /**
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    48
     * Create a new command instance.
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    49
     */
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
    50
    public function __construct() {
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    51
        parent::__construct();
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    52
    }
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
    54
    /**
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    55
     * Get the list of dcmi types for the graph
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    56
     */
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    57
    private function getDocTypes($doc, $docUri) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    58
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    59
        $res = $doc->resource($docUri);
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    60
        $docTypes = [];
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    61
        //foreach ($res->all("http://purl.org/dc/elements/1.1/type") as $resType) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    62
        foreach ($res->all("dc11:type","resource") as $resType) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    63
            $type = $resType->getUri();
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    64
            if(0 === strpos($type, 'http://purl.org/dc/dcmitype/')) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    65
                $docTypes[] = $type;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    66
            }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    67
        }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    68
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    69
        // if the doc type list is empty, check that we have a collection
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    70
        if(empty($docTypes)) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    71
            if(!empty($doc->allOfType('edm:Collection'))) {
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    72
                $docTypes[] = "http://purl.org/dc/dcmitype/Collection";
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    73
            }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    74
        }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    75
        return $docTypes;
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    76
    }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
    77
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    78
    /**
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    79
     * Map a documents into graphes.
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    80
     */
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    81
    public function mapDoc($doc, $docUri) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    82
        $inputDocTypes = $this->getDocTypes($doc, $docUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    83
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    84
        $docType = count($inputDocTypes)>0? $inputDocTypes[0]:null;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    85
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    86
        if(is_null($docType) || !array_key_exists($docType,ImportCocoonRDF::MAPPER_CLASS_MAP)) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    87
            $this->error("\nError processing $identifier ($docRdfUrl) : $docType unknown mapper");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    88
            Log::error("Error processing $identifier ($docRdfUrl) : $docType unknown mapper");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    89
            $this->documentCount['unknown'] += 1;
518
4864076bf0e3 Correct error cases after code reorganization
ymh <ymh.work@gmail.com>
parents: 513
diff changeset
    90
            return ['unknown', null];
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    91
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    92
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    93
        $mapperClass = ImportCocoonRDF::MAPPER_CLASS_MAP[$docType];
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    94
        $mapper = new $mapperClass($doc, $docUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    95
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    96
        try {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    97
            $mapper->mapGraph();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    98
        } catch (\Exception $e) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
    99
            Log::error("Error processing $identifier ($docRdfUrl) : error mapping graph : $e");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   100
            $this->documentCount['error'] += 1;
518
4864076bf0e3 Correct error cases after code reorganization
ymh <ymh.work@gmail.com>
parents: 513
diff changeset
   101
            return ['error', null];
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   102
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   103
        $this->documentCount['all'] += 1;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   104
        $this->documentCount[$docType] = isset($this->documentCount[$docType])?$this->documentCount[$docType]+1:1;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   105
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   106
        return [$docType, $mapper->getOutputGraphes()];
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   107
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   108
    }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   109
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   110
    public function mergeDocs($docType, $outputGraphes) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   111
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   112
        foreach ($outputGraphes as $mappedGraphKey => $mappedGraph) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   113
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   114
            $mappedGraphUri = $mappedGraph->getUri();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   115
            try {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   116
                $resDocs = $this->gs->query("CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <$mappedGraphUri> { ?s ?p ?o }}");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   117
            } catch (\Exception $e) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   118
                $this->error("\nError on graph query $mappedGraphUri : $e \n" . $e->getMessage() . "\n");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   119
                Log::error("\nError on graph query $mappedGraphUri : $e \n" . $e->getMessage());
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   120
                exit;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   121
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   122
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   123
            $mergedGraph = null;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   124
            $doDelete = true;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   125
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   126
            if($resDocs->isEmpty()) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   127
                $mergedGraph = $mappedGraph;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   128
                $doDelete = false;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   129
            } else {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   130
                $doDelete = true;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   131
                $mappedTypes = $this->getDocTypes($mappedGraph, $mappedGraphUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   132
                $presentTypes = $this->getDocTypes($resDocs, $mappedGraphUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   133
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   134
                if($docType == "http://purl.org/dc/dcmitype/Collection" || in_array("http://purl.org/dc/dcmitype/Collection", $mappedTypes)) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   135
                    $merger = new \CorpusParole\Libraries\Mergers\CocoonCollectionRdfMerger();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   136
                    $baseGraph = $resDocs;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   137
                    $sourceGraph = $mappedGraph;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   138
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   139
                elseif ($docType == "http://purl.org/dc/dcmitype/Text") {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   140
                    $merger = new \CorpusParole\Libraries\Mergers\CocoonTextRdfMerger();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   141
                    $baseGraph = $resDocs;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   142
                    $sourceGraph = $mappedGraph;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   143
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   144
                else {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   145
                    $merger = new \CorpusParole\Libraries\Mergers\CocoonSoundRdfMerger();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   146
                    $baseGraph = $mappedGraph;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   147
                    $sourceGraph = $resDocs;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   148
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   149
                $mergedGraph = $merger->mergeGraph($baseGraph, $sourceGraph, $mappedGraphUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   150
                if(\EasyRdf\Isomorphic::isomorphic($resDocs, $mergedGraph)) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   151
                    //graph are isomorphic no need to go farther for this graph
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   152
                    Log::info("Graph are isomorphic for $mappedGraphUri, skipping");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   153
                    continue;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   154
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   155
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   156
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   157
            try {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   158
                if($doDelete) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   159
                    $this->gs->clear($mappedGraphUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   160
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   161
                $this->gs->insert($mergedGraph, $mappedGraphUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   162
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   163
            catch(\Exception $e) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   164
                // just log not much we can do here...
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   165
                $this->error("\nError on insert $mappedGraphUri : $e");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   166
                Log::error("Error on insert $mappedGraphUri : $e");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   167
                $code = $e->getCode();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   168
                $message = $e->getMessage();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   169
                if($e instanceof EasyRdf\Exception && stripos($message, 'timed out')>=0 && $insertTimeout<= ImportCocoonRDF::INSERT_TIMEOUT_RETRY) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   170
                    $this->info("\nThis is a timeout, we continue.");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   171
                    Log::info("This is a timeout, we continue.");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   172
                    $insertTimeouts++;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   173
                    continue;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   174
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   175
                throw $e;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   176
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   177
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   178
    }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   179
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   180
    function getModified($graph) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   181
        // get first element of array
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   182
        $providedCHORes = $graph->allOfType('http://www.europeana.eu/schemas/edm/ProvidedCHO');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   183
        $providedCHO = reset($providedCHORes);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   184
        if($providedCHO === false) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   185
            $date = new \DateTime();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   186
            $date->setTimestamp(0);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   187
            return $date;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   188
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   189
        $modified = $providedCHO->getLiteral("<http://purl.org/dc/terms/modified>");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   190
        if(is_null($modified)) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   191
            $date = new \DateTime();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   192
            $date->setTimestamp(0);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   193
            return $date;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   194
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   195
        return \DateTime::createFromFormat(\DateTime::W3C, $modified->getValue());
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   196
    }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   197
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   198
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   199
    /**
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   200
     * Execute the console command.
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   201
     *
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   202
     * @return mixed
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   203
     */
526
cdaf9dfb5dfd correct licence problem in bug #0026523
ymh <ymh.work@gmail.com>
parents: 518
diff changeset
   204
    public function handle() {
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   205
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   206
        libxml_use_internal_errors(true);
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   207
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   208
        $skip = (int)$this->option('skip');
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   209
        $raw = !$this->option('no-raw');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   210
        $rawClear = !$this->option('no-raw-clear');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   211
        $clear = $this->option('clear');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   212
        $forceImport = $this->option('force-import');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   213
        $keepRepoDoc = $this->option('keep-repo-doc');
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   214
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   215
        $this->comment("Skipping $skip records");
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   216
        $this->comment("Querying Cocoon: ".($raw?'TRUE':'FALSE'));
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   217
        $this->comment("Clear raw repository: ".($rawClear?'TRUE':'FALSE'));
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   218
        $this->comment("Clear repository: ".($clear?'TRUE':'FALSE'));
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   219
        $this->comment("Keep existing document into repository: ".($keepRepoDoc?'TRUE':'FALSE'));
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   220
        $this->comment("Overwrite more recent document:".($forceImport?'TRUE':'FALSE'));
18
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   221
531
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   222
        $this->httpClient = app()->make('Guzzle');
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   223
        $this->gs = new GuzzleSparqlClient($this->httpClient, Config::get('corpusparole.rdf4j_query_url'), Config::get('corpusparole.rdf4j_update_url'));
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   224
        $this->gs_raw = new GuzzleSparqlClient($this->httpClient, Config::get('corpusparole.rdf4j_query_url_raw'), Config::get('corpusparole.rdf4j_update_url_raw'));
18
f2a40bbc27f6 add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents: 4
diff changeset
   225
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   226
        $this->documentCount = [
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   227
            'all' => 0,
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   228
            'unknown' => 0,
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   229
            'error' => 0,
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   230
            'raw_duplicates' => 0,
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   231
            'modified' => 0,
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   232
            'replaced' => 0
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   233
        ];
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   234
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   235
        if($raw) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   236
            $client = new Client(Config::get('corpusparole.cocoon_oaipmh_url'));
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   237
            $endpoint = new Endpoint($client);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   238
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   239
            $recs = $endpoint->listRecords('olac', null, null, 'LanguesDeFrance');
506
8a5bb4b48b85 try to solve #0025932 + try to improve indexing process reliability by retrying bnf label resolve queries
ymh <ymh.work@gmail.com>
parents: 412
diff changeset
   240
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   241
            $progressBar = $this->output->createProgressBar($recs->getTotalRecordsInCollection());
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   242
            $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   243
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   244
            $insertTimeouts = 0;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   245
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   246
            //Clear raw repository if asked
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   247
            if($rawClear) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   248
                $this->gs_raw->clear("all");
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   249
            }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   250
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   251
            foreach ($recs as $item) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   252
                $item->registerXPathNamespace('oai', "http://www.openarchives.org/OAI/2.0/");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   253
                $identifier = (string) $item->xpath('oai:header/oai:identifier')[0];
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   254
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   255
                $docRdfUrl = Config::get('corpusparole.cocoon_rdf_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base')));
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   256
                $message = "$identifier : $docRdfUrl";
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   257
                if($recs->getNumRetrieved() <= $skip) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   258
                    $progressBar->setMessage("$message - Skipping");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   259
                    $progressBar->advance();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   260
                    continue;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   261
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   262
                $progressBar->setMessage($message);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   263
                $progressBar->advance();
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   264
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   265
                $docUri = config('corpusparole.cocoon_doc_id_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base')));
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   266
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   267
                $docLoaded = false;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   268
                $loadRetry = 0;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   269
                $doc = null;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   270
                while(!$docLoaded && $loadRetry < config('corpusparole.max_load_retry', 3)) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   271
                    $loadRetry++;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   272
                    try {
531
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   273
                        $resp = $this->httpClient->get($docRdfUrl);
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   274
                        $content_type = $resp->getHeader('Content-Type');
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   275
                        $format = null;
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   276
                        if(is_array($content_type) && count($content_type)>0) {
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   277
                            list($format, ) = \EasyRdf\Utils::parseMimeType($content_type[0]);
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   278
                        }
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   279
                        $doc = new \EasyRdf\Graph($docRdfUrl, $resp->getBody(), $format);
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   280
                        $docLoaded = true;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   281
                    }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   282
                    //TODO: catch network exception - add error to database
531
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   283
                    catch(\GuzzleHttp\Exception\ConnectException $e) {
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   284
                        $this->info("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying");
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   285
                        Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying");
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   286
                        continue;
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   287
                    }
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   288
                    catch(\GuzzleHttp\Exception\ClientException $e) {
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   289
                        if($e->getResponse()->getStatusCode() == 400) {
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   290
                            $this->info("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   291
                            Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   292
                            continue;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   293
                        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   294
                        else {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   295
                            $this->error("\nError processing $identifier ($docRdfUrl) : $e");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   296
                            Log::error("Error processing $identifier ($docRdfUrl) : $e");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   297
                            break;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   298
                        }
531
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   299
                    }
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   300
                    catch(\Exception $e) {
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   301
                        $this->error("\nError processing $identifier ($docRdfUrl) : $e");
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   302
                        Log::error("Error processing $identifier ($docRdfUrl) : $e");
48f5380c26d0 Replace EasyRdf http loading with guzzle to solve proxy problems
ymh <ymh.work@gmail.com>
parents: 526
diff changeset
   303
                        break;
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   304
                    }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   305
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   306
                if(!$docLoaded) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   307
                    $this->documentCount['error'] += 1;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   308
                    continue;
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   309
                }
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   310
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   311
                $resDocsRaw = $this->gs_raw->query("ASK WHERE { GRAPH <$docUri> { ?s ?p ?o }}");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   312
                if($resDocsRaw->getBoolean()) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   313
                    $this->gs_raw->clear($docUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   314
                    $this->documentCount['raw_duplicates'] += 1;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   315
                }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   316
                $this->gs_raw->insert($doc, $docUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   317
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   318
            $progressBar->setMessage("finished raw import");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   319
            $progressBar->finish();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   320
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   321
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   322
        if($clear) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   323
            $this->gs->clear("all");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   324
        }
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   325
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   326
        $collectionDocsUris = $this->gs_raw->query("SELECT distinct ?uri WHERE {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   327
            GRAPH ?uri {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   328
                ?s <http://purl.org/dc/elements/1.1/type> <http://purl.org/dc/dcmitype/Collection>.
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   329
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   330
        }");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   331
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   332
        $collectionCount = count($collectionDocsUris);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   333
        $this->info("\nImporting $collectionCount Collections from raw repository");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   334
        $progressBar = $this->output->createProgressBar($collectionCount);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   335
        $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   336
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   337
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   338
        foreach($collectionDocsUris as $docUriRes) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   339
            $docUri = $docUriRes->uri->getUri();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   340
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   341
            $progressBar->setMessage("Importing collection $docUri.");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   342
            $progressBar->advance();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   343
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   344
            $doc = $this->gs_raw->query("CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <$docUri> { ?s ?p ?o. }}");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   345
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   346
            //map the doc
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   347
            list($docType, $mappedGraphes) = $this->mapDoc($doc, $docUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   348
518
4864076bf0e3 Correct error cases after code reorganization
ymh <ymh.work@gmail.com>
parents: 513
diff changeset
   349
            if($docType === 'unknown' || $docType === 'error') {
4864076bf0e3 Correct error cases after code reorganization
ymh <ymh.work@gmail.com>
parents: 513
diff changeset
   350
                // The error has been traced in mapDoc
4864076bf0e3 Correct error cases after code reorganization
ymh <ymh.work@gmail.com>
parents: 513
diff changeset
   351
                continue;
4864076bf0e3 Correct error cases after code reorganization
ymh <ymh.work@gmail.com>
parents: 513
diff changeset
   352
            }
4864076bf0e3 Correct error cases after code reorganization
ymh <ymh.work@gmail.com>
parents: 513
diff changeset
   353
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   354
            //merge the result docs
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   355
            $this->mergeDocs($docType, $mappedGraphes);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   356
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   357
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   358
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   359
        $progressBar->setMessage("finished raw import for collections.");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   360
        $progressBar->finish();
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   361
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   362
        // list the existing documents
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   363
        $providedCHODocsUris = [];
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   364
        $providedCHODocsUrisRes = $this->gs->query("SELECT distinct ?uri WHERE {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   365
            GRAPH ?uri {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   366
                ?s <http://www.w3.org/1999/02/22-rdf-syntax-ns#type> <http://www.europeana.eu/schemas/edm/ProvidedCHO>.
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   367
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   368
        }");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   369
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   370
        foreach($providedCHODocsUrisRes as $docUriRes) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   371
            array_push($providedCHODocsUris, $docUriRes->uri->getUri());
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   372
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   373
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   374
        $this->info("\n\nWe have ".count($providedCHODocsUris)." providedCHO in database.\n");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   375
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   376
        $soundDocsUris = $this->gs_raw->query("SELECT distinct ?uri WHERE {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   377
            GRAPH ?uri {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   378
                ?s <http://purl.org/dc/elements/1.1/type> ?o.
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   379
                FILTER(?o IN (<http://purl.org/dc/dcmitype/Sound>, <http://purl.org/dc/dcmitype/MovingImage>))
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   380
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   381
        }");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   382
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   383
        $soundCount = count($soundDocsUris);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   384
        $this->info("\nImporting $soundCount Sound (or Moving Image) from raw repository\n");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   385
        $progressBar = $this->output->createProgressBar($soundCount);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   386
        $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   387
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   388
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   389
        foreach($soundDocsUris as $docUriRes) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   390
            $docUri = $docUriRes->uri->getUri();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   391
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   392
            $progressBar->setMessage("Importing Sound (or Moving Image) $docUri.");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   393
            $progressBar->advance();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   394
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   395
            $doc = $this->gs_raw->query("CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <$docUri> { ?s ?p ?o. }}");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   396
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   397
            //map the doc
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   398
            list($docType, $mappedGraphes) = $this->mapDoc($doc, $docUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   399
            $firstGraph = reset($mappedGraphes); // first graph is main graph
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   400
            // remove it from list of existing graphes in repository
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   401
            $firstGraphUri = $firstGraph->getUri();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   402
            if(($key = array_search($firstGraphUri, $providedCHODocsUris)) !== false) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   403
               unset($providedCHODocsUris[$key]);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   404
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   405
            //if asked, delete it from repository. check modified date
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   406
            //merge the result docs
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   407
            try {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   408
                $resDocs = $this->gs->query("CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <$firstGraphUri> { ?s ?p ?o }}");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   409
            } catch (\Exception $e) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   410
                $this->error("\nError on graph query $firstGraphUri : $e \n" . $e->getMessage() . "\n");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   411
                Log::error("\nError on graph query $firstGraphUri : $e \n" . $e->getMessage());
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   412
                exit;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   413
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   414
            $doDelete = true;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   415
            if($resDocs->isEmpty()) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   416
                $doDelete = false;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   417
            } else {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   418
                // get modified from repo
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   419
                $dateRepo = $this->getModified($resDocs);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   420
                // get modified from import
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   421
                $dateImport = $this->getModified($firstGraph);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   422
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   423
                if($dateRepo > $dateImport) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   424
                    $this->documentCount['modified'] += 1;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   425
                    $doDelete = $forceImport;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   426
                } else {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   427
                    $doDelete = !$keepRepoDoc;
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   428
                }
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   429
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   430
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   431
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   432
            if($doDelete) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   433
                $this->documentCount['replaced'] += 1;
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   434
                $this->gs->clear($firstGraphUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   435
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   436
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   437
            $this->mergeDocs($docType, $mappedGraphes);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   438
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   439
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   440
        $progressBar->setMessage("finished raw import for sounds.");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   441
        $progressBar->finish();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   442
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   443
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   444
        $textDocsUris = $this->gs_raw->query("SELECT distinct ?uri WHERE {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   445
            GRAPH ?uri {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   446
                ?s <http://purl.org/dc/elements/1.1/type> <http://purl.org/dc/dcmitype/Text>.
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   447
            }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   448
        }");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   449
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   450
        $textCount = count($textDocsUris);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   451
        $this->info("\n\nImporting $textCount text from raw repository\n");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   452
        $progressBar = $this->output->createProgressBar($textCount);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   453
        $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%');
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   454
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   455
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   456
        foreach($textDocsUris as $docUriRes) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   457
            $docUri = $docUriRes->uri->getUri();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   458
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   459
            $progressBar->setMessage("Importing Text $docUri.");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   460
            $progressBar->advance();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   461
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   462
            $doc = $this->gs_raw->query("CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <$docUri> { ?s ?p ?o. }}");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   463
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   464
            //map the doc
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   465
            list($docType, $mappedGraphes) = $this->mapDoc($doc, $docUri);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   466
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   467
            //merge the result docs
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   468
            $this->mergeDocs($docType, $mappedGraphes);
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   469
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   470
        }
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   471
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   472
        $progressBar->setMessage("finished raw import for text.");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   473
        $progressBar->finish();
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   474
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   475
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   476
        // delete left overs from previous repository
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   477
        $this->info("\n\nThere is ".count($providedCHODocsUris)." documents left-over.\n");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   478
        if(count($providedCHODocsUris) > 0 && $delete_old) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   479
            foreach($providedCHODocsUris as $graphUri) {
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   480
                $this->gs->clear($graphUri);
4
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   481
            }
f55970e41793 first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents: 3
diff changeset
   482
        }
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   483
508
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   484
        $this->info("\n\nDocument count info: ");
2cb514f10a72 Improve RDF import
ymh <ymh.work@gmail.com>
parents: 506
diff changeset
   485
        foreach ($this->documentCount as $docType => $docCount) {
172
660570f13537 Add error counter to importRDF command + retry on code 400 for sparql download
ymh <ymh.work@gmail.com>
parents: 118
diff changeset
   486
            if($docType == 'error' && $docCount > 0) {
660570f13537 Add error counter to importRDF command + retry on code 400 for sparql download
ymh <ymh.work@gmail.com>
parents: 118
diff changeset
   487
                $this->error("$docType => $docCount");
660570f13537 Add error counter to importRDF command + retry on code 400 for sparql download
ymh <ymh.work@gmail.com>
parents: 118
diff changeset
   488
            } else {
660570f13537 Add error counter to importRDF command + retry on code 400 for sparql download
ymh <ymh.work@gmail.com>
parents: 118
diff changeset
   489
                $this->info("$docType => $docCount");
660570f13537 Add error counter to importRDF command + retry on code 400 for sparql download
ymh <ymh.work@gmail.com>
parents: 118
diff changeset
   490
            }
19
eadaf0b8f02e Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents: 18
diff changeset
   491
        }
2
00e2916104fe Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents: 1
diff changeset
   492
    }
1
01a844d292ac dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff changeset
   493
}