Utils/WikiTagUtils.php
changeset 116 a023e0185a02
parent 115 085ea4dbfeee
child 117 5771052a647a
--- a/Utils/WikiTagUtils.php	Wed Jan 29 12:16:16 2014 +0100
+++ b/Utils/WikiTagUtils.php	Thu Jan 30 17:52:14 2014 +0100
@@ -190,7 +190,7 @@
      * @param string $url
      * @return object (json decoded)
      */
-    private static function curlRequest($url)
+    private static function curlRequest($url, $throw_error=true)
     {
     	$ch = curl_init();
     	curl_setopt($ch, CURLOPT_URL, $url);
@@ -219,7 +219,7 @@
     	$curl_error = curl_error($ch);
     	curl_close($ch);
     
-    	if ($curl_errno > 0) {
+    	if ($curl_errno > 0 && $throw_error) {
     		throw new \Exception("$url\n request failed. cURLError #$curl_errno: $curl_error\n", $curl_errno, null);
     	}
     	
@@ -281,16 +281,19 @@
     /**
      * Builds DbPedia URI
      */
-    private static function getDbpediaUri($label, $params=[])
+    public static function getDbpediaUri($label, $params=[], $throw_error=true)
     {
     	// Get lang from url
     	$dbp_url = $GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.url_templates")["dbpedia_sparql"];
     	$lang = substr($dbp_url, 7, 2);
+    	// filter with regexp to avoid results with "category:LABEL" or other "abc:LABEL"
+    	//"query" => 'select distinct * where { ?s rdfs:label "'.$label.'"@'.$lang.' }',
+    	//"query" => 'select distinct * where { ?s rdfs:label "'.$label.'"@'.$lang.' . FILTER (regex(?s, "^http\\\\://[^:]+$")) }',
     	$params = [
-    		"query" => 'select distinct * where {?s rdfs:label "'.$label.'"@'.$lang.'}',
+    		"query" => 'select distinct * where { ?s rdfs:label "'.$label.'"@'.$lang.' . FILTER (regex(?s, "^http\\\\://[^:]+$")) }',
     		"format" => 'application/json',
     	];
-
+    	
     	$params_str = '';
     	foreach ($params as $key => $value) {
     		if ($params_str==''){
@@ -303,15 +306,30 @@
     	
     	$url = $GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.url_templates")["dbpedia_sparql"].'?'.$params_str;
     	
-    	$res = WikiTagUtils::curlRequest($url);
+    	$res = WikiTagUtils::curlRequest($url, $throw_error);
     	$val = json_decode($res, true);
     	$uri = "";
-    	if(array_key_exists("results", $val)){
-    		if(array_key_exists("bindings", $val["results"])){
-    			if(count($val["results"]["bindings"]) > 0){
-    				$uri = $val["results"]["bindings"][0]["s"]["value"];
-    			}
-    		}
+    	if($val){
+	    	if(array_key_exists("results", $val)){
+	    		if(array_key_exists("bindings", $val["results"])){
+	    			$len = count($val["results"]["bindings"]);
+	    			if($len > 0){
+	    				$uri = $val["results"]["bindings"][0]["s"]["value"];
+	    				if($len>1){
+	    					// If there are several results, we test the "url label" to see if it matches the label.
+	    					// Why ? Because, for example "1000" gets "Category:1000" and "1000" as result.
+	    					// We keep this code to be safe but the regexp in the sparql request normally avoids this problem.
+	    					for($i=0;$i<$len;$i++){
+	    						$res_uri = $val["results"]["bindings"][$i]["s"]["value"];
+	    						$url_label = substr( $res_uri, strrpos( $res_uri, '/' )+1 );
+	    						if(str_replace(" ", "_", $label) == $url_label){
+	    							$uri = $res_uri;
+	    						}
+	    					}
+	    				}
+	    			}
+	    		}
+	    	}
     	}
     	return $uri;
     }