server/src/app/Http/Controllers/Sparql/SparqlClientController.php
author ymh <ymh.work@gmail.com>
Tue, 08 Nov 2016 18:23:01 +0100
changeset 405 f239c8c5bb94
parent 389 2204faa0b41a
child 531 48f5380c26d0
permissions -rw-r--r--
migrate to rdf4j (2.1.1) from sesame (4.1.1)

<?php

namespace CorpusParole\Http\Controllers\Sparql;

use Log;

use Illuminate\Http\Request;
use Illuminate\Pagination\LengthAwarePaginator;
use Illuminate\Pagination\Paginator;

use GuzzleHttp\Client;
use EasyRdf\Sparql\Result as SparqlResult;
use EasyRdf\Graph;
use EasyRdf\RdfNamespace;

use CorpusParole\Http\Controllers\Controller;
use CorpusParole\Libraries\Utils;

use CorpusParole\Libraries\Sparql\SparqlClient;
use CorpusParole\Libraries\Sparql\SparqlQueryAnalyser;
use RdfHelper;

class SparqlClientController extends Controller
{

    const HEADERS_FORWARDED = [ "host", "user-agent", "accept", "accept-language", "accept-encoding", "connection" ];

    const SELECT_RESULT_FORMAT = [
        "SPARQL/CSV" => "text/csv",
        "SPARQL/JSON" => "application/sparql-results+json",
        "SPARQL/XML" => "application/sparql-results+xml",
        "SPARQL/TSV" => "text/tab-separated-values",
        "BINARY" => "application/x-binary-rdf-results-table"
    ];

    const GRAPH_RESULT_FORMAT = [
        "N-Triples" => "application/n-triples",
        "RDF/XML" => "application/rdf+xml",
        "Turtle" => "text/turtle",
        "N3" => "text/n3",
        "RDF/JSON" => "application/rdf+json",
        "TriG" => "application/trig",
        "N-Quads" => "application/n-quads",
        "BinaryRDF" => "application/x-binary-rdf",
        "TriX" => "application/trix",
        "JSON-LD" => "application/ld+json"
    ];


    private $sparqlClient;
    private $httpClient;

    public function __construct(SparqlClient $sparqlClient, Client $httpClient) {
        $this->sparqlClient = $sparqlClient;
        $this->httpClient = $httpClient;
    }

    public function getSparqlClient($timeout = null) {
        if(is_null($timeout)) {
            $timeout = config('corpusparole.sparql_client_timeout');
        }
        $queryUrl = config('corpusparole.rdf4j_query_url');
        if($timeout > 0) {
            $queryUrl = $queryUrl .
                ((strlen(parse_url($queryUrl, PHP_URL_QUERY)) > 0)?"&":"?").
                "timeout=$timeout";
        }
        return new \EasyRdf\Sparql\Client($queryUrl, config('corpusparole.rdf4j_update_url'));
    }

    // display form
    public function index() {
        return view('sparql/sparqlClientForm');
    }

    private function readDocs($docs, $fields) {
        $results = [];
        foreach($docs as $row) {
            $results[] = array_reduce($fields, function($res, $field) use ($row) {
                if(isset($row->$field)) {
                    $res[$field] = RdfHelper::serialiseValue($row->$field);
                } else {
                    $res[$field] = "<span class=\"corpus-rdf-blank-value\">&nbsp;</span>";
                }
                return $res;
            }, []);
        }
        return $results;
    }

    private function abort($code, $message, $exception) {
        throw new \Symfony\Component\HttpKernel\Exception\HttpException($code, $message, $exception, []);
    }

    private function processQueryException($exception) {
        $message = $exception->getMessage();
        if($exception instanceof \EasyRdf\Http\Exception) {
            if(preg_match("/SPARQL\squery/", $message)) {
                $this->abort(400, "La requête SPARQL n'est pas reconnue", $exception);
            } else {
                $this->abort(500, "Problème HTTP lors de la requête SPARQL", $exception);
            }
        } elseif($exception instanceof \EasyRdf\Exception) {
            if(preg_match("/timed\sout/i", $message)) {
                $this->abort(408, "Time-out causé par la requête SPARQL", $exception);
            } else {
                $this->abort(500, "Problème dans la requête SPARQL", $exception);
            }
        } else {
            $this->abort(500, "Erreur serveur lors de la requête", $exception);
        }
    }

    private function querySelect(Request $request, $query, $analyser) {

        $limit = intval($request->input('limit', config('corpusparole.sparql_client_default_limit')));

        if($limit === 0 || !is_null($analyser->getLimit()) || !is_null($analyser->getOffset()) ) {
            try {
                $docs = $this->getSparqlClient()->query($query);
            } catch(\Exception $exception) {
                $this->processQueryException($exception);
            }

            $fields = $docs->getFields();
            $results = $this->readDocs($docs, $fields);
            $count = count($results);

        } else {

            $page = Paginator::resolveCurrentPage(config('corpusparole.pagination_page_param'));
            assert(is_null($page) || is_numeric($page));

            $offset = max(0,($page - 1) * $limit);

            try {
                $countResult = $this->getSparqlClient()->query($analyser->getCountQuery());
                $docs = $this->getSparqlClient()->query($query . " LIMIT $limit OFFSET $offset");
            } catch(\Exception $exception) {
                $this->processQueryException($exception);
            }


            $countField = $countResult->getFields()[0];
            $countResult->rewind();
            $count = $countResult->current()->$countField->getValue();

            $fields = $docs->getFields();

            $results = new LengthAwarePaginator($this->readDocs($docs, $fields), $count, $limit, $page, [
                'path' => Paginator::resolveCurrentPath(),
                'pageName' => config('corpusparole.pagination_page_param'),
            ]);
        }

        $namespaces = array_reduce(array_keys(RdfHelper::getPrefixes()), function($res, $p) {
            $res[$p] = RdfNamespace::namespaces()[$p];
            return $res;
        }, []);

        $data = [
            'query' => $query,
            'count' => $count,
            'fields' => $fields,
            'fieldPrefix' => "?",
            'results' => $results,
            'namespaces' => $namespaces,
            'downloadFormats' => self::SELECT_RESULT_FORMAT
        ];
        $view = 'sparql/sparqlClientResultList';
        return [$view, $data];
    }

    private function queryGraph(Request $request, $query, $analyser) {
        try {
            $docs = $this->getSparqlClient()->query($query);
        } catch(\Exception $exception) {
            $this->processQueryException($exception);
        }


        $fields = ["subject", "predicate", "object"];
        $results = [];
        foreach ($docs->resources() as $resource ) {
            foreach ($resource->propertyUris() as $property) {
                $propertyResource = $docs->resource($property);
                foreach ($resource->all($propertyResource) as $value) {
                    $results[] = [
                        'subject' => RdfHelper::serialiseValue($resource),
                        'predicate'=> RdfHelper::serialiseValue($propertyResource),
                        'object'=> RdfHelper::serialiseValue($value)
                    ];
                }
            }
        }
        $namespaces = array_reduce(array_keys(RdfHelper::getPrefixes()), function($res, $p) {
            $res[$p] = RdfNamespace::namespaces()[$p];
            return $res;
        }, []);


        $data = [
            'query' => $query,
            'count' => count($results),
            'fields' => $fields,
            'fieldPrefix' => "",
            'results' => $results,
            'namespaces' => $namespaces,
            'downloadFormats' => self::GRAPH_RESULT_FORMAT
        ];
        $view = 'sparql/sparqlClientResultList';

        return [$view, $data];
    }

    private function queryAsk(Request $request, $query, $analyser) {
        try {
            $result = $this->getSparqlClient()->query($query);
        } catch(\Exception $exception) {
            $this->processQueryException($exception);
        }

        $data = [
            'results' => $result,
            'namespaces' => $analyser->getPrefixes()
        ];

        $view = 'sparql/sparqlClientResultBoolean';
        return [$view, $data];
    }

    private function showHtml(Request $request) {

        $query = $request->input('query');

        $analyser = new SparqlQueryAnalyser($query);

        $queryType = $analyser->getQueryType();

        $namespaces = $analyser->getPrefixes();

        foreach($namespaces as $prefix => $nUri) {
            RdfNamespace::set($prefix,$nUri);
        }

        if($queryType === SparqlQueryAnalyser::SELECT_QUERY) {
            list($view, $data) = $this->querySelect($request, $query, $analyser);
        } elseif($queryType === SparqlQueryAnalyser::GRAPH_QUERY) {
            list($view, $data) = $this->queryGraph($request, $query, $analyser);
        } elseif($queryType === SparqlQueryAnalyser::ASK_QUERY) {
            list($view, $data) = $this->queryAsk($request, $query, $analyser);
        } else {
            abort(400, "La requête n'est pas reconnue");
        }

        return view($view, $data);

    }


    private function proxyQuery(Request $request, $format=null) {
        $query = $request->input('query');
        $headers = [];
        foreach (self::HEADERS_FORWARDED as $h) {
            $headerValue = $request->header($h);
            if($headerValue) {
                $headers[$h] = $headerValue;
            }
        }

        if(!empty($format)){
            $headers['Accept'] = $format;
        }

        $queryParams = $request->all();
        $queryParams['timeout'] = config('corpusparole.sparql_client_timeout');
        $queryUrl = config('corpusparole.rdf4j_query_url');

        try {
            $rdf4jResp = $this->httpClient->post($queryUrl, ['form_params' => $queryParams, 'headers' => $headers]);
        } catch(\GuzzleHttp\Exception\ServerException $exception) {
            if($exception->getCode() == 503) {
                $this->abort(408, "Time-out causé par la requête SPARQL", $exception);
            } else {
                $this->abort(500, "Problème lors de la requête SPARQL", $exception);
            }

        } catch(\GuzzleHttp\Exception\RequestException $exception) {
            $message = $exception->getMessage();
            if(preg_match("/MALFORMED\sQUERY/i", $message)) {
                $abortMessage = "Requête SPARQL mal-formée";
            } else {
                $abortMessage = "Problème lors de la requête SPARQL";
            }
            $this->abort($exception->getCode(), $abortMessage, $exception);
        } catch(\Exception $exception) {
            $this->abort(500, "Erreur serveur lors de la requête", $exception);
        }

        $resp = response((string)$rdf4jResp->getBody(), $rdf4jResp->getStatusCode());
        foreach ($rdf4jResp->getHeaders() as $name => $values) {
            if($name != 'Transfer-Encoding') {
                $resp->header($name, $values);
            }
        }

        return $resp;
    }

    // display result
    public function show(Request $request) {

        $format = $request->input('format');

        if($format === 'text/html') {
            return $this->showHtml($request);
        } else {
            return $this->proxyQuery($request, $format);
        }

    }

    // do the query
    public function query(Request $request) {
        return $this->proxyQuery($request);
    }

}