server/src/app/Libraries/Sparql/SparqlQueryAnalyser.php
changeset 386 c731ab9b934d
child 387 7fba86fa8604
--- /dev/null	Thu Jan 01 00:00:00 1970 +0000
+++ b/server/src/app/Libraries/Sparql/SparqlQueryAnalyser.php	Mon Oct 31 14:24:23 2016 +0100
@@ -0,0 +1,126 @@
+<?php
+namespace CorpusParole\Libraries\Sparql;
+
+use CorpusParole\Libraries\Utils;
+
+class SparqlQueryAnalyser {
+
+    const SPARQL_PREFIX_BASE_REGEXP = '(((?:prefix\s+([\p{L}-\d]+)\s*\:)|base)\s*\<((?:(?:https?|ftp)://)(?:\S+(?::\S*)?@)?(?:(?!10(?:\.\d{1,3}){3})(?!127(?:\.\d{1,3}){3})(?!169\.254(?:\.\d{1,3}){2})(?!192\.168(?:\.\d{1,3}){2})(?!172\.(?:1[6-9]|2\d|3[0-1])(?:\.\d{1,3}){2})(?:[1-9]\d?|1\d\d|2[01]\d|22[0-3])(?:\.(?:1?\d{1,2}|2[0-4]\d|25[0-5])){2}(?:\.(?:[1-9]\d?|1\d\d|2[0-4]\d|25[0-4]))|(?:(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)(?:\.(?:[a-z\x{00a1}-\x{ffff}0-9]+-?)*[a-z\x{00a1}-\x{ffff}0-9]+)*(?:\.(?:[a-z\x{00a1}-\x{ffff}]{2,})))(?::\d{2,5})?(?:/[^\s]*)?)\>)\s*';
+
+    const SPARQL_SELECT_QUERY_REGEXP = '^(?:\s*(?:'.self::SPARQL_PREFIX_BASE_REGEXP.')*select)';
+    const SPARQL_ASK_QUERY_REGEXP = '^(?:\s*(?:'.self::SPARQL_PREFIX_BASE_REGEXP.')*ask)';
+    const SPARQL_GRAPH_QUERY_REGEXP = '^(?:\s*(?:'.self::SPARQL_PREFIX_BASE_REGEXP.')*(?:(?:construct)|(?:describe)))';
+
+    const SPARQL_LIMIT_OFFSET_QUERY_REGEXP = '(?:(?:(limit\s+(\d+))|(offset\s+(\d+)))\s*)+\s*$';
+
+    const UNKNOWN_QUERY = 0;
+    const SELECT_QUERY = 1;
+    const GRAPH_QUERY = 2;
+    const ASK_QUERY = 2;
+
+    private $query;
+    private $queryType = false;
+    private $rawPrefixes = false;
+    private $prefixes = false;
+    private $limit = false;
+    private $offset = false;
+    private $rawQuery = false;
+    private $countVar = false;
+
+    public function __construct($query) {
+        $this->query = $query;
+    }
+
+    public function getQueryType() {
+
+        if($this->queryType === false) {
+            if(preg_match("%".self::SPARQL_SELECT_QUERY_REGEXP."%iu", $this->query) === 1) {
+                $this->queryType = self::SELECT_QUERY;
+            } elseif(preg_match("%".self::SPARQL_GRAPH_QUERY_REGEXP."%iu", $this->query) === 1) {
+                $this->queryType = self::GRAPH_QUERY;
+            } elseif(preg_match("%".self::SPARQL_ASK_QUERY_REGEXP."%iu", $this->query) === 1) {
+                $this->queryType = self::ASK_QUERY;
+            } else {
+                $this->queryType = self::UNKNOWN_QUERY;
+            }
+        }
+        return $this->queryType;
+    }
+
+    private function extractPrefix() {
+        $prefixes = [];
+        $rawPrefixes = [];
+        $res = preg_replace_callback("%".self::SPARQL_PREFIX_BASE_REGEXP."%iu", function($m) use (&$prefixes, &$rawPrefixes) {
+            $rawPrefixes[] = trim($m[0]);
+            $prefixes[$m[3]?$m[3]:""] = $m[4];
+            return "";
+        }, $this->query);
+
+        return [$rawPrefixes, $prefixes, trim($res)];
+    }
+
+    public function getRawPrefixes() {
+        if($this->rawPrefixes === false) {
+            list($this->rawPrefixes, $this->prefixes, $this->rawQuery) = $this->extractPrefix();
+        }
+        return $this->rawPrefixes;
+    }
+
+    public function getPrefixes() {
+        if($this->prefixes === false) {
+            list($this->rawPrefixes, $this->prefixes, $this->rawQuery) = $this->extractPrefix();
+        }
+        return $this->prefixes;
+    }
+
+    public function getRawQuery() {
+        if($this->rawQuery === false) {
+            list($this->rawPrefixes, $this->prefixes, $this->rawQuery) = $this->extractPrefix();
+        }
+        return $this->rawQuery;
+    }
+
+    public function getCountVar() {
+        if($this->countVar === false) {
+            $this->countVar = "?count_cp_".hash('md5', $this->query);
+        }
+        return $this->countVar;
+    }
+
+    public function getCountQuery() {
+        return implode(" ", $this->getRawPrefixes())." select (count(*) as ".$this->getCountVar().") { ".$this->getRawQuery()." }";
+    }
+
+    private function setLimitOffset() {
+        if(preg_match("%".self::SPARQL_LIMIT_OFFSET_QUERY_REGEXP."%iu", $this->query, $m) === 1) {
+            for($i=0;$i<(count($m)-1)/2;$i++) {
+                if(Utils::startsWith(strtolower($m[2*$i+1]), "limit")) {
+                    $this->limit = intval($m[$i*2+2]);
+                } elseif (Utils::startsWith(strtolower($m[2*$i+1]), "offset")) {
+                    $this->offset = intval($m[$i*2+2]);
+                }
+            }
+        }
+        if($this->limit === false) {
+            $this->limit = null;
+        }
+        if($this->offset === false) {
+            $this->offset = null;
+        }
+    }
+
+    public function getLimit() {
+        if($this->limit === false) {
+            $this->setLimitOffset();
+        }
+        return $this->limit;
+    }
+
+    public function getOffset() {
+        if($this->offset === false) {
+            $this->setLimitOffset();
+        }
+        return $this->offset;
+    }
+
+}