diff -r 085ea4dbfeee -r a023e0185a02 Utils/WikiTagUtils.php --- a/Utils/WikiTagUtils.php Wed Jan 29 12:16:16 2014 +0100 +++ b/Utils/WikiTagUtils.php Thu Jan 30 17:52:14 2014 +0100 @@ -190,7 +190,7 @@ * @param string $url * @return object (json decoded) */ - private static function curlRequest($url) + private static function curlRequest($url, $throw_error=true) { $ch = curl_init(); curl_setopt($ch, CURLOPT_URL, $url); @@ -219,7 +219,7 @@ $curl_error = curl_error($ch); curl_close($ch); - if ($curl_errno > 0) { + if ($curl_errno > 0 && $throw_error) { throw new \Exception("$url\n request failed. cURLError #$curl_errno: $curl_error\n", $curl_errno, null); } @@ -281,16 +281,19 @@ /** * Builds DbPedia URI */ - private static function getDbpediaUri($label, $params=[]) + public static function getDbpediaUri($label, $params=[], $throw_error=true) { // Get lang from url $dbp_url = $GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.url_templates")["dbpedia_sparql"]; $lang = substr($dbp_url, 7, 2); + // filter with regexp to avoid results with "category:LABEL" or other "abc:LABEL" + //"query" => 'select distinct * where { ?s rdfs:label "'.$label.'"@'.$lang.' }', + //"query" => 'select distinct * where { ?s rdfs:label "'.$label.'"@'.$lang.' . FILTER (regex(?s, "^http\\\\://[^:]+$")) }', $params = [ - "query" => 'select distinct * where {?s rdfs:label "'.$label.'"@'.$lang.'}', + "query" => 'select distinct * where { ?s rdfs:label "'.$label.'"@'.$lang.' . FILTER (regex(?s, "^http\\\\://[^:]+$")) }', "format" => 'application/json', ]; - + $params_str = ''; foreach ($params as $key => $value) { if ($params_str==''){ @@ -303,15 +306,30 @@ $url = $GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.url_templates")["dbpedia_sparql"].'?'.$params_str; - $res = WikiTagUtils::curlRequest($url); + $res = WikiTagUtils::curlRequest($url, $throw_error); $val = json_decode($res, true); $uri = ""; - if(array_key_exists("results", $val)){ - if(array_key_exists("bindings", $val["results"])){ - if(count($val["results"]["bindings"]) > 0){ - $uri = $val["results"]["bindings"][0]["s"]["value"]; - } - } + if($val){ + if(array_key_exists("results", $val)){ + if(array_key_exists("bindings", $val["results"])){ + $len = count($val["results"]["bindings"]); + if($len > 0){ + $uri = $val["results"]["bindings"][0]["s"]["value"]; + if($len>1){ + // If there are several results, we test the "url label" to see if it matches the label. + // Why ? Because, for example "1000" gets "Category:1000" and "1000" as result. + // We keep this code to be safe but the regexp in the sparql request normally avoids this problem. + for($i=0;$i<$len;$i++){ + $res_uri = $val["results"]["bindings"][$i]["s"]["value"]; + $url_label = substr( $res_uri, strrpos( $res_uri, '/' )+1 ); + if(str_replace(" ", "_", $label) == $url_label){ + $uri = $res_uri; + } + } + } + } + } + } } return $uri; }