author | ymh <ymh.work@gmail.com> |
Fri, 27 Nov 2015 17:59:36 +0100 | |
changeset 19 | eadaf0b8f02e |
parent 18 | f2a40bbc27f6 |
child 114 | 8af5ed0521a2 |
permissions | -rw-r--r-- |
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
1 |
<?php |
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
2 |
|
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
3 |
namespace CorpusParole\Console\Commands; |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
4 |
|
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
5 |
use Config; |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
6 |
use Log; |
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
7 |
use Illuminate\Console\Command; |
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
8 |
use Symfony\Component\Console\Input\InputOption; |
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
9 |
use Symfony\Component\Console\Input\InputArgument; |
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
10 |
use Phpoaipmh\Client; |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
11 |
use Phpoaipmh\Endpoint; |
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
12 |
|
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
13 |
class ImportCocoonRDF extends Command { |
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
14 |
|
18
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
15 |
const INSERT_TIMEOUT_RETRY = 5; |
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
16 |
|
19
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
17 |
const MAPPER_CLASS_MAP = [ |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
18 |
"http://purl.org/dc/dcmitype/Sound" => '\CorpusParole\Libraries\Mappers\CocoonSoundRdfMapper', |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
19 |
"http://purl.org/dc/dcmitype/MovingImage" => '\CorpusParole\Libraries\Mappers\CocoonSoundRdfMapper', |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
20 |
"http://purl.org/dc/dcmitype/Text" => '\CorpusParole\Libraries\Mappers\CocoonTextRdfMapper', |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
21 |
"http://purl.org/dc/dcmitype/Collection" => '\CorpusParole\Libraries\Mappers\CocoonCollectionRdfMapper' |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
22 |
]; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
23 |
|
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
24 |
/** |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
25 |
* The console command description. |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
26 |
* |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
27 |
* @var string |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
28 |
*/ |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
29 |
protected $description = 'Import Rdf from Cocoon.'; |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
30 |
|
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
31 |
/** |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
32 |
* The name and signature of the console command. |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
33 |
* |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
34 |
* @var string |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
35 |
*/ |
19
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
36 |
protected $signature = 'corpus-parole:importRDF {--skip=0 : Number of record to skip} {--raw : Register raw}'; |
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
37 |
|
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
38 |
/** |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
39 |
* Create a new command instance. |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
40 |
*/ |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
41 |
public function __construct() { |
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
42 |
parent::__construct(); |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
43 |
} |
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
44 |
|
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
45 |
/** |
19
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
46 |
* Get the list of dcmi types for the graph |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
47 |
*/ |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
48 |
private function getDocTypes($doc, $docUri) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
49 |
|
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
50 |
$res = $doc->resource($docUri); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
51 |
$docTypes = []; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
52 |
//foreach ($res->all("http://purl.org/dc/elements/1.1/type") as $resType) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
53 |
foreach ($res->all("dc11:type","resource") as $resType) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
54 |
$type = $resType->getUri(); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
55 |
if(0 === strpos($type, 'http://purl.org/dc/dcmitype/')) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
56 |
$docTypes[] = $type; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
57 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
58 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
59 |
|
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
60 |
// if the doc type list is empty, check that we have a collection |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
61 |
if(empty($docTypes)) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
62 |
if(!empty($doc->allOfType('edm:Collection'))) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
63 |
$docTypes[] = "http://purl.org/dc/dcmitype/Collection"; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
64 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
65 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
66 |
return $docTypes; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
67 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
68 |
|
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
69 |
|
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
70 |
/** |
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
71 |
* Execute the console command. |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
72 |
* |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
73 |
* @return mixed |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
74 |
*/ |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
75 |
public function fire() { |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
76 |
|
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
77 |
libxml_use_internal_errors(true); |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
78 |
|
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
79 |
$skip = (int)$this->option('skip'); |
19
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
80 |
$raw = $this->option('raw'); |
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
81 |
|
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
82 |
$this->comment("Skipping $skip records"); |
19
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
83 |
$this->comment("Recording raw queries: $raw"); |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
84 |
|
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
85 |
$gs = new \EasyRdf\Sparql\Client(Config::get('corpusparole.sesame_query_url'), Config::get('corpusparole.sesame_update_url')); |
19
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
86 |
$gs_raw = new \EasyRdf\Sparql\Client(Config::get('corpusparole.sesame_query_url_raw'), Config::get('corpusparole.sesame_update_url_raw')); |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
87 |
|
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
88 |
|
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
89 |
$client = new Client(Config::get('corpusparole.cocoon_oaipmh_url')); |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
90 |
$endpoint = new Endpoint($client); |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
91 |
|
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
92 |
$recs = $endpoint->listRecords('olac', null, null, 'LanguesDeFrance'); |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
93 |
|
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
94 |
//TODO : treat timeout exceptions |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
95 |
$progressBar = $this->output->createProgressBar($recs->getTotalRecordsInCollection()); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
96 |
$progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%'); |
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
97 |
|
18
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
98 |
$insertTimeouts = 0; |
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
99 |
|
19
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
100 |
$documentCounts = ['all' => 0, 'unknown' => 0]; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
101 |
|
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
102 |
foreach ($recs as $item) { |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
103 |
|
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
104 |
$identifier = (string) $item->xpath('/record/header/identifier')[0]; |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
105 |
$docRdfUrl = Config::get('corpusparole.cocoon_rdf_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base'))); |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
106 |
$message = "$identifier : $docRdfUrl"; |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
107 |
if($recs->getNumRetrieved() <= $skip) { |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
108 |
$progressBar->setMessage("$message - Skipping"); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
109 |
$progressBar->advance(); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
110 |
continue; |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
111 |
} |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
112 |
$progressBar->setMessage($message); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
113 |
$progressBar->advance(); |
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
114 |
|
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
115 |
$docUri = config('corpusparole.cocoon_doc_id_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base'))); |
18
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
116 |
|
19
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
117 |
$docLoaded = false; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
118 |
$loadRetry = 0; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
119 |
$doc = null; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
120 |
while(!$docLoaded && $loadRetry < config('corpusparole.max_load_retry', 3)) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
121 |
$loadRetry++; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
122 |
try { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
123 |
$doc = new \EasyRdf\Graph($docRdfUrl); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
124 |
$doc->load(); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
125 |
$docLoaded = true; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
126 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
127 |
//TODO: catch network exception - add error to database |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
128 |
catch(\Exception $e) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
129 |
$code = $e->getCode(); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
130 |
$message = $e->getMessage(); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
131 |
$this->info("\nError processing $identifier. code : $code, message: $message"); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
132 |
Log::debug("Error processing $identifier. code : $code, message: $message"); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
133 |
if($code == 0 && stripos($message, 'timed out')>=0 ) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
134 |
$this->info("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying"); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
135 |
Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying"); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
136 |
continue; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
137 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
138 |
else { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
139 |
$this->error("\nError processing $identifier ($docRdfUrl) : $e"); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
140 |
Log::error("Error processing $identifier ($docRdfUrl) : $e"); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
141 |
break; |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
142 |
} |
19
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
143 |
//$this->error(print_r($e->getTraceAsString(),true)); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
144 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
145 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
146 |
if(!$docLoaded) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
147 |
continue; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
148 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
149 |
|
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
150 |
//insert raw |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
151 |
if($raw) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
152 |
$resDocsRaw = $gs_raw->query("ASK WHERE { GRAPH <$docUri> { ?s ?p ?o }}"); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
153 |
if($resDocsRaw->getBoolean()) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
154 |
$gs_raw->clear($docUri); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
155 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
156 |
$gs_raw->insert($doc, $docUri); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
157 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
158 |
|
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
159 |
//map doc |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
160 |
$inputDocTypes = $this->getDocTypes($doc, $docUri); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
161 |
|
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
162 |
$docType = count($inputDocTypes)>0? $inputDocTypes[0]:null; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
163 |
|
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
164 |
if(is_null($docType) || !array_key_exists($docType,ImportCocoonRDF::MAPPER_CLASS_MAP)) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
165 |
$this->error("\nError processing $identifier ($docRdfUrl) : $docType unknown mapper"); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
166 |
Log::error("Error processing $identifier ($docRdfUrl) : $docType unknown mapper"); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
167 |
$documentCounts['unknown'] += 1; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
168 |
continue; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
169 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
170 |
$documentCounts['all'] += 1; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
171 |
$documentCounts[$docType] = isset($documentCounts[$docType])?$documentCounts[$docType]+1:1; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
172 |
|
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
173 |
$mapperClass = ImportCocoonRDF::MAPPER_CLASS_MAP[$docType]; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
174 |
$mapper = new $mapperClass($doc, $docUri); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
175 |
|
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
176 |
$mapper->mapGraph(); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
177 |
$mappedGraphes = $mapper->getOutputGraphes(); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
178 |
|
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
179 |
foreach ($mapper->getOutputGraphes() as $mappedGraphKey => $mappedGraph) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
180 |
|
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
181 |
$mappedGraphUri = $mappedGraph->getUri(); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
182 |
try { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
183 |
$resDocs = $gs->query("CONSTRUCT { ?s ?p ?o } WHERE { GRAPH <$mappedGraphUri> { ?s ?p ?o }}"); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
184 |
} catch (\Exception $e) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
185 |
$this->error("\nError on graph query $identifier ($mappedGraphUri) : $e \n" . $e->getBody() . "\n"); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
186 |
Log::error("\nError on graph query $identifier ($mappedGraphUri) : $e \n" . $e->getBody()); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
187 |
exit; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
188 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
189 |
|
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
190 |
$mergedGraph = null; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
191 |
$doDelete = true; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
192 |
|
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
193 |
if($resDocs->isEmpty()) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
194 |
$mergedGraph = $mappedGraph; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
195 |
$doDelete = false; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
196 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
197 |
else { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
198 |
$doDelete = true; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
199 |
$mappedTypes = $this->getDocTypes($mappedGraph, $mappedGraphUri); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
200 |
$presentTypes = $this->getDocTypes($resDocs, $mappedGraphUri); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
201 |
|
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
202 |
if($docType == "http://purl.org/dc/dcmitype/Collection" || in_array("http://purl.org/dc/dcmitype/Collection", $mappedTypes)) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
203 |
$merger = new \CorpusParole\Libraries\Mergers\CocoonCollectionRdfMerger(); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
204 |
$baseGraph = $resDocs; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
205 |
$sourceGraph = $mappedGraph; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
206 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
207 |
elseif ($docType == "http://purl.org/dc/dcmitype/Text") { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
208 |
$merger = new \CorpusParole\Libraries\Mergers\CocoonSoundRdfMerger(); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
209 |
$baseGraph = $resDocs; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
210 |
$sourceGraph = $mappedGraph; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
211 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
212 |
else { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
213 |
$merger = new \CorpusParole\Libraries\Mergers\CocoonSoundRdfMerger(); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
214 |
$baseGraph = $mappedGraph; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
215 |
$sourceGraph = $resDocs; |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
216 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
217 |
$mergedGraph = $merger->mergeGraph($baseGraph, $sourceGraph, $mappedGraphUri); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
218 |
if(\EasyRdf\Isomorphic::isomorphic($resDocs, $mergedGraph)) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
219 |
//graph are isomorphic no need to go farther for this graph |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
220 |
Log::info("Graph are isomorphic for $mappedGraphUri (from $identifier : $docRdfUrl), skipping"); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
221 |
continue; |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
222 |
} |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
223 |
} |
19
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
224 |
|
18
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
225 |
try { |
19
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
226 |
if($doDelete) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
227 |
$gs->clear($mappedGraphUri); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
228 |
} |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
229 |
$gs->insert($mergedGraph, $mappedGraphUri); |
18
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
230 |
} |
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
231 |
catch(\Exception $e) { |
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
232 |
// just log not much we can do here... |
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
233 |
$this->error("\nError on insert $identifier ($docRdfUrl) : $e"); |
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
234 |
Log::error("Error on insert $identifier ($docRdfUrl) : $e"); |
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
235 |
$code = $e->getCode(); |
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
236 |
$message = $e->getMessage(); |
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
237 |
if($e instanceof EasyRdf\Exception && stripos($message, 'timed out')>=0 && $insertTimeout<= ImportCocoonRDF::INSERT_TIMEOUT_RETRY) { |
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
238 |
$this->info("\nThis is a timeout, we continue."); |
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
239 |
Log::info("This is a timeout, we continue."); |
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
240 |
$insertTimeouts++; |
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
241 |
continue; |
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
242 |
} |
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
243 |
throw $e; |
f2a40bbc27f6
add rdf mapper + merger + basic database model
ymh <ymh.work@gmail.com>
parents:
4
diff
changeset
|
244 |
} |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
245 |
} |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
246 |
} |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
247 |
$progressBar->setMessage("finished"); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
248 |
$progressBar->finish(); |
19
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
249 |
|
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
250 |
$this->info("\nDocument count info: "); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
251 |
foreach ($documentCounts as $docType => $docCount) { |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
252 |
$this->info("$docType => $docCount"); |
eadaf0b8f02e
Bo conception step. back to ember page
ymh <ymh.work@gmail.com>
parents:
18
diff
changeset
|
253 |
} |
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
254 |
} |
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
255 |
} |