author | nowmad@23.1.168.192.in-addr.arpa |
Fri, 13 Nov 2015 11:01:27 +0100 | |
changeset 13 | ba32dd4cf8d3 |
parent 4 | f55970e41793 |
child 18 | f2a40bbc27f6 |
permissions | -rw-r--r-- |
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
1 |
<?php |
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
2 |
|
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
3 |
namespace CorpusParole\Console\Commands; |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
4 |
|
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
5 |
use Config; |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
6 |
use Log; |
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
7 |
use Illuminate\Console\Command; |
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
8 |
use Symfony\Component\Console\Input\InputOption; |
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
9 |
use Symfony\Component\Console\Input\InputArgument; |
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
10 |
use Phpoaipmh\Client; |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
11 |
use Phpoaipmh\Endpoint; |
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
12 |
|
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
13 |
class ImportCocoonRDF extends Command { |
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
14 |
|
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
15 |
/** |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
16 |
* The console command description. |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
17 |
* |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
18 |
* @var string |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
19 |
*/ |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
20 |
protected $description = 'Import Rdf from Cocoon.'; |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
21 |
|
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
22 |
/** |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
23 |
* The name and signature of the console command. |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
24 |
* |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
25 |
* @var string |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
26 |
*/ |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
27 |
protected $signature = 'corpus-parole:importRDF {--skip=0 : Number of record to skip}'; |
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
28 |
|
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
29 |
/** |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
30 |
* Create a new command instance. |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
31 |
*/ |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
32 |
public function __construct() { |
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
33 |
parent::__construct(); |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
34 |
} |
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
35 |
|
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
36 |
/** |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
37 |
* Execute the console command. |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
38 |
* |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
39 |
* @return mixed |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
40 |
*/ |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
41 |
public function fire() { |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
42 |
|
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
43 |
libxml_use_internal_errors(true); |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
44 |
|
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
45 |
$skip = (int)$this->option('skip'); |
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
46 |
|
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
47 |
$this->comment("Skipping $skip records"); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
48 |
|
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
49 |
$gs = new \EasyRdf\Sparql\Client(Config::get('corpusparole.sesame_query_url'), Config::get('corpusparole.sesame_update_url')); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
50 |
|
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
51 |
|
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
52 |
$client = new Client(Config::get('corpusparole.cocoon_oaipmh_url')); |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
53 |
$endpoint = new Endpoint($client); |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
54 |
|
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
55 |
$recs = $endpoint->listRecords('olac', null, null, 'LanguesDeFrance'); |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
56 |
|
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
57 |
//TODO : treat timeout exceptions |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
58 |
$progressBar = $this->output->createProgressBar($recs->getTotalRecordsInCollection()); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
59 |
$progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%'); |
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
60 |
|
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
61 |
foreach ($recs as $item) { |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
62 |
|
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
63 |
$identifier = (string) $item->xpath('/record/header/identifier')[0]; |
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
64 |
$docRdfUrl = Config::get('corpusparole.cocoon_rdf_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base'))); |
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
65 |
$message = "$identifier : $docRdfUrl"; |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
66 |
if($recs->getNumRetrieved() <= $skip) { |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
67 |
$progressBar->setMessage("$message - Skipping"); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
68 |
$progressBar->advance(); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
69 |
continue; |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
70 |
} |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
71 |
$progressBar->setMessage($message); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
72 |
$progressBar->advance(); |
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
73 |
|
4
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
74 |
$docUri = config('corpusparole.cocoon_doc_id_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base'))); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
75 |
$resDocs = $gs->query("ASK WHERE { GRAPH <$docUri> { ?s ?p ?o }}"); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
76 |
if(!$resDocs->getBoolean()) { |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
77 |
$docLoaded = false; |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
78 |
$loadRetry = 0; |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
79 |
while(!$docLoaded && $loadRetry < config('corpusparole.max_load_retry', 3)) { |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
80 |
$loadRetry++; |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
81 |
try { |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
82 |
$doc = new \EasyRdf\Graph($docRdfUrl); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
83 |
$doc->load(); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
84 |
$docLoaded = true; |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
85 |
} |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
86 |
//TODO: catch network exception - add error to database |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
87 |
catch(\Exception $e) { |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
88 |
$code = $e->getCode(); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
89 |
$message = $e->getMessage(); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
90 |
$this->debug("\nError processing $identifier. code : $code, message: $message"); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
91 |
Log::debug("Error processing $identifier. code : $code, message: $message"); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
92 |
if($code == 1 && stripos($message, 'timed out')>=0 ) { |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
93 |
$this->warning("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying"); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
94 |
Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying"); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
95 |
continue; |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
96 |
} |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
97 |
else { |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
98 |
$this->error("\nError processing $identifier ($docRdfUrl) : $e"); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
99 |
Log::error("Error processing $identifier ($docRdfUrl) : $e"); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
100 |
break; |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
101 |
} |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
102 |
//$this->error(print_r($e->getTraceAsString(),true)); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
103 |
} |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
104 |
} |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
105 |
if(!$docLoaded) { |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
106 |
continue; |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
107 |
} |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
108 |
//TODO: treat errors |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
109 |
$subjects = $doc->resources(); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
110 |
$subject = reset($subjects)->getUri(); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
111 |
//TODO: exceptions ? but if pb on insert probably we have to fail anyway |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
112 |
$gs->insert($doc, $subject); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
113 |
} |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
114 |
} |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
115 |
$progressBar->setMessage("finished"); |
f55970e41793
first skeleton of bo client in ember
ymh <ymh.work@gmail.com>
parents:
3
diff
changeset
|
116 |
$progressBar->finish(); |
2
00e2916104fe
Migrate to php 5.6 + Laravel 5.1 + add phpunit test
ymh <ymh.work@gmail.com>
parents:
1
diff
changeset
|
117 |
} |
1
01a844d292ac
dev environment + first skeleton for bo
ymh <ymh.work@gmail.com>
parents:
diff
changeset
|
118 |
} |