<?php

namespace CorpusParole\Console\Commands;

use Config;
use Log;
use Illuminate\Console\Command;
use Symfony\Component\Console\Input\InputOption;
use Symfony\Component\Console\Input\InputArgument;
use Phpoaipmh\Client;
use Phpoaipmh\Endpoint;

class ImportCocoonRDF extends Command {

    const INSERT_TIMEOUT_RETRY = 5;

    /**
     * The console command description.
     *
     * @var string
     */
    protected $description = 'Import Rdf from Cocoon.';

    /**
    * The name and signature of the console command.
    *
    * @var string
    */
    protected $signature = 'corpus-parole:importRDF {--skip=0 : Number of record to skip}';

    /**
     * Create a new command instance.
     */
    public function __construct() {
        parent::__construct();
    }

    /**
     * Execute the console command.
     *
     * @return mixed
     */
    public function fire() {

        libxml_use_internal_errors(true);

        $skip = (int)$this->option('skip');

        $this->comment("Skipping $skip records");

        $gs = new \EasyRdf\Sparql\Client(Config::get('corpusparole.sesame_query_url'), Config::get('corpusparole.sesame_update_url'));


        $client = new Client(Config::get('corpusparole.cocoon_oaipmh_url'));
        $endpoint = new Endpoint($client);

        $recs = $endpoint->listRecords('olac', null, null, 'LanguesDeFrance');

        //TODO : treat timeout exceptions
        $progressBar = $this->output->createProgressBar($recs->getTotalRecordsInCollection());
        $progressBar->setFormat(' %current%/%max% [%bar%] %percent:3s%% - %message%');

        $insertTimeouts = 0;

        foreach ($recs as $item) {

            $identifier = (string) $item->xpath('/record/header/identifier')[0];
            $docRdfUrl = Config::get('corpusparole.cocoon_rdf_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base')));
            $message = "$identifier : $docRdfUrl";
            if($recs->getNumRetrieved() <= $skip) {
                $progressBar->setMessage("$message - Skipping");
                $progressBar->advance();
                continue;
            }
            $progressBar->setMessage($message);
            $progressBar->advance();

            $docUri = config('corpusparole.cocoon_doc_id_base_uri').substr($identifier, strlen(Config::get('corpusparole.cocoon_doc_id_base')));

            $resDocs = $gs->query("ASK WHERE { GRAPH <$docUri> { ?s ?p ?o }}");
            if(!$resDocs->getBoolean()) {
                $docLoaded = false;
                $loadRetry = 0;
                while(!$docLoaded && $loadRetry < config('corpusparole.max_load_retry', 3)) {
                    $loadRetry++;
                    try {
                        $doc = new \EasyRdf\Graph($docRdfUrl);
                        $doc->load();
                        $docLoaded = true;
                    }
                    //TODO: catch network exception - add error to database
                    catch(\Exception $e) {
                        $code = $e->getCode();
                        $message = $e->getMessage();
                        $this->debug("\nError processing $identifier. code : $code, message: $message");
                        Log::debug("Error processing $identifier. code : $code, message: $message");
                        if($code == 1 && stripos($message, 'timed out')>=0 ) {
                            $this->warning("\nTimeout error processing $identifier ($docRdfUrl) : $e, retrying");
                            Log::warning("Timeout error processing $identifier ($docRdfUrl) : $e, retrying");
                            continue;
                        }
                        else {
                            $this->error("\nError processing $identifier ($docRdfUrl) : $e");
                            Log::error("Error processing $identifier ($docRdfUrl) : $e");
                            break;
                        }
                        //$this->error(print_r($e->getTraceAsString(),true));
                    }
                }
                if(!$docLoaded) {
                    continue;
                }
                //TODO: treat errors
                $subjects = $doc->resources();
                $subject = reset($subjects)->getUri();
                try {
                    $gs->insert($doc, $subject);
                }
                catch(\Exception $e) {
                    // just log not much we can do here...
                    $this->error("\nError on insert $identifier ($docRdfUrl) : $e");
                    Log::error("Error on insert $identifier ($docRdfUrl) : $e");
                    $code = $e->getCode();
                    $message = $e->getMessage();
                    if($e instanceof EasyRdf\Exception && stripos($message, 'timed out')>=0 && $insertTimeout<= ImportCocoonRDF::INSERT_TIMEOUT_RETRY) {
                        $this->info("\nThis is a timeout, we continue.");
                        Log::info("This is a timeout, we continue.");
                        $insertTimeouts++;
                        continue;
                    }
                    throw $e;
                }
            }
        }
        $progressBar->setMessage("finished");
        $progressBar->finish();
    }
}
