1 <?php |
1 <?php |
2 |
2 |
3 namespace CorpusParole\Console\Commands; |
3 namespace CorpusParole\Console\Commands; |
4 |
4 |
5 use Config; |
5 use Config; |
|
6 use Log; |
6 use Illuminate\Console\Command; |
7 use Illuminate\Console\Command; |
7 use Symfony\Component\Console\Input\InputOption; |
8 use Symfony\Component\Console\Input\InputOption; |
8 use Symfony\Component\Console\Input\InputArgument; |
9 use Symfony\Component\Console\Input\InputArgument; |
9 use Phpoaipmh\Client; |
10 use Phpoaipmh\Client; |
10 use Phpoaipmh\Endpoint; |
11 use Phpoaipmh\Endpoint; |
11 |
12 |
12 class ImportCocoonRDF extends Command |
13 class ImportCocoonRDF extends Command { |
13 { |
|
14 /** |
|
15 * The console command name. |
|
16 * |
|
17 * @var string |
|
18 */ |
|
19 protected $name = 'corpus-parole:importRDF'; |
|
20 |
14 |
21 /** |
15 /** |
22 * The console command description. |
16 * The console command description. |
23 * |
17 * |
24 * @var string |
18 * @var string |
25 */ |
19 */ |
26 protected $description = 'Command description.'; |
20 protected $description = 'Import Rdf from Cocoon.'; |
|
21 |
|
22 /** |
|
23 * The name and signature of the console command. |
|
24 * |
|
25 * @var string |
|
26 */ |
|
27 protected $signature = 'corpus-parole:importRDF {--skip=0 : Number of record to skip}'; |
27 |
28 |
28 /** |
29 /** |
29 * Create a new command instance. |
30 * Create a new command instance. |
30 */ |
31 */ |
31 public function __construct() |
32 public function __construct() { |
32 { |
|
33 parent::__construct(); |
33 parent::__construct(); |
34 } |
34 } |
35 |
35 |
36 /** |
36 /** |
37 * Execute the console command. |
37 * Execute the console command. |
38 * |
38 * |
39 * @return mixed |
39 * @return mixed |
40 */ |
40 */ |
41 public function fire() |
41 public function fire() { |
42 { |
42 |
43 echo("hello\n"); |
|
44 libxml_use_internal_errors(true); |
43 libxml_use_internal_errors(true); |
45 |
44 |
46 //$gs = new \EasyRdf_GraphStore(Config::get('corpusparole.sesame_update_url')); |
45 $skip = (int)$this->option('skip'); |
47 $gs = new \EasyRdf_Sparql_Client(Config::get('corpusparole.sesame_query_url'), Config::get('corpusparole.sesame_update_url')); |
|
48 |
46 |
49 //$doc = new \EasyRdf_Graph("http://cocoon.huma-num.fr/exist/crdo/rdf/crdo-ESLO1_ENTCONT_203"); |
47 $this->comment("Skipping $skip records"); |
50 //$doc->load(); |
48 |
|
49 $gs = new \EasyRdf\Sparql\Client(Config::get('corpusparole.sesame_query_url'), Config::get('corpusparole.sesame_update_url')); |
|
50 |
51 |
51 |
52 $client = new Client(Config::get('corpusparole.cocoon_oaipmh_url')); |
52 $client = new Client(Config::get('corpusparole.cocoon_oaipmh_url')); |
53 //$client = new Client('http://memory.loc.gov/cgi-bin/oai2_0'); |
|
54 $endpoint = new Endpoint($client); |
53 $endpoint = new Endpoint($client); |
55 |
54 |
56 $recs = $endpoint->listRecords('olac', null, null, 'LanguesDeFrance'); |
55 $recs = $endpoint->listRecords('olac', null, null, 'LanguesDeFrance'); |
57 //$recs = $endpoint->listRecords('oai_dc', null, null, 'mussm'); |
56 |
|
57 //TODO : treat timeout exceptions |
|
58 $progressBar = $this->output->createProgressBar($recs->getTotalRecordsInCollection()); |
|
59 $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%'); |
58 |
60 |
59 foreach ($recs as $item) { |
61 foreach ($recs as $item) { |
60 if ($recs->getNumRequests() > 1) { |
62 |
61 break; |
|
62 } |
|
63 $identifier = (string) $item->xpath('/record/header/identifier')[0]; |
63 $identifier = (string) $item->xpath('/record/header/identifier')[0]; |
64 $docRdfUrl = Config::get('corpusparole.cocoon_rdf_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base'))); |
64 $docRdfUrl = Config::get('corpusparole.cocoon_rdf_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base'))); |
65 print("Processing $identifier : $docRdfUrl\n"); |
65 $message = "$identifier : $docRdfUrl"; |
66 $doc = new \EasyRdf_Graph($docRdfUrl); |
66 if($recs->getNumRetrieved() <= $skip) { |
67 $doc->load(); |
67 $progressBar->setMessage("$message - Skipping"); |
68 $subjects = $doc->resourcesMatching('foaf:primaryTopic'); |
68 $progressBar->advance(); |
69 $subject = reset($subjects)->getUri(); |
69 continue; |
70 $gs->insert($doc, $subject); |
70 } |
|
71 $progressBar->setMessage($message); |
|
72 $progressBar->advance(); |
|
73 |
|
74 $docUri = config('corpusparole.cocoon_doc_id_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base'))); |
|
75 $resDocs = $gs->query("ASK WHERE { GRAPH <$docUri> { ?s ?p ?o }}"); |
|
76 if(!$resDocs->getBoolean()) { |
|
77 $docLoaded = false; |
|
78 $loadRetry = 0; |
|
79 while(!$docLoaded && $loadRetry < config('corpusparole.max_load_retry', 3)) { |
|
80 $loadRetry++; |
|
81 try { |
|
82 $doc = new \EasyRdf\Graph($docRdfUrl); |
|
83 $doc->load(); |
|
84 $docLoaded = true; |
|
85 } |
|
86 //TODO: catch network exception - add error to database |
|
87 catch(\Exception $e) { |
|
88 $code = $e->getCode(); |
|
89 $message = $e->getMessage(); |
|
90 $this->debug("\nError processing $identifier. code : $code, message: $message"); |
|
91 Log::debug("Error processing $identifier. code : $code, message: $message"); |
|
92 if($code == 1 && stripos($message, 'timed out')>=0 ) { |
|
93 $this->warning("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying"); |
|
94 Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying"); |
|
95 continue; |
|
96 } |
|
97 else { |
|
98 $this->error("\nError processing $identifier ($docRdfUrl) : $e"); |
|
99 Log::error("Error processing $identifier ($docRdfUrl) : $e"); |
|
100 break; |
|
101 } |
|
102 //$this->error(print_r($e->getTraceAsString(),true)); |
|
103 } |
|
104 } |
|
105 if(!$docLoaded) { |
|
106 continue; |
|
107 } |
|
108 //TODO: treat errors |
|
109 $subjects = $doc->resources(); |
|
110 $subject = reset($subjects)->getUri(); |
|
111 //TODO: exceptions ? but if pb on insert probably we have to fail anyway |
|
112 $gs->insert($doc, $subject); |
|
113 } |
71 } |
114 } |
72 } |
115 $progressBar->setMessage("finished"); |
73 |
116 $progressBar->finish(); |
74 /** |
|
75 * Get the console command arguments. |
|
76 * |
|
77 * @return array |
|
78 */ |
|
79 protected function getArguments() |
|
80 { |
|
81 return [ |
|
82 ['example', InputArgument::REQUIRED, 'An example argument.'], |
|
83 ]; |
|
84 } |
|
85 |
|
86 /** |
|
87 * Get the console command options. |
|
88 * |
|
89 * @return array |
|
90 */ |
|
91 protected function getOptions() |
|
92 { |
|
93 return [ |
|
94 //['example', null, InputOption::VALUE_OPTIONAL, 'An example option.', null], |
|
95 ]; |
|
96 } |
117 } |
97 } |
118 } |