--- a/Utils/WikiTagUtils.php Wed Jan 29 12:16:16 2014 +0100
+++ b/Utils/WikiTagUtils.php Thu Jan 30 17:52:14 2014 +0100
@@ -190,7 +190,7 @@
* @param string $url
* @return object (json decoded)
*/
- private static function curlRequest($url)
+ private static function curlRequest($url, $throw_error=true)
{
$ch = curl_init();
curl_setopt($ch, CURLOPT_URL, $url);
@@ -219,7 +219,7 @@
$curl_error = curl_error($ch);
curl_close($ch);
- if ($curl_errno > 0) {
+ if ($curl_errno > 0 && $throw_error) {
throw new \Exception("$url\n request failed. cURLError #$curl_errno: $curl_error\n", $curl_errno, null);
}
@@ -281,16 +281,19 @@
/**
* Builds DbPedia URI
*/
- private static function getDbpediaUri($label, $params=[])
+ public static function getDbpediaUri($label, $params=[], $throw_error=true)
{
// Get lang from url
$dbp_url = $GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.url_templates")["dbpedia_sparql"];
$lang = substr($dbp_url, 7, 2);
+ // filter with regexp to avoid results with "category:LABEL" or other "abc:LABEL"
+ //"query" => 'select distinct * where { ?s rdfs:label "'.$label.'"@'.$lang.' }',
+ //"query" => 'select distinct * where { ?s rdfs:label "'.$label.'"@'.$lang.' . FILTER (regex(?s, "^http\\\\://[^:]+$")) }',
$params = [
- "query" => 'select distinct * where {?s rdfs:label "'.$label.'"@'.$lang.'}',
+ "query" => 'select distinct * where { ?s rdfs:label "'.$label.'"@'.$lang.' . FILTER (regex(?s, "^http\\\\://[^:]+$")) }',
"format" => 'application/json',
];
-
+
$params_str = '';
foreach ($params as $key => $value) {
if ($params_str==''){
@@ -303,15 +306,30 @@
$url = $GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.url_templates")["dbpedia_sparql"].'?'.$params_str;
- $res = WikiTagUtils::curlRequest($url);
+ $res = WikiTagUtils::curlRequest($url, $throw_error);
$val = json_decode($res, true);
$uri = "";
- if(array_key_exists("results", $val)){
- if(array_key_exists("bindings", $val["results"])){
- if(count($val["results"]["bindings"]) > 0){
- $uri = $val["results"]["bindings"][0]["s"]["value"];
- }
- }
+ if($val){
+ if(array_key_exists("results", $val)){
+ if(array_key_exists("bindings", $val["results"])){
+ $len = count($val["results"]["bindings"]);
+ if($len > 0){
+ $uri = $val["results"]["bindings"][0]["s"]["value"];
+ if($len>1){
+ // If there are several results, we test the "url label" to see if it matches the label.
+ // Why ? Because, for example "1000" gets "Category:1000" and "1000" as result.
+ // We keep this code to be safe but the regexp in the sparql request normally avoids this problem.
+ for($i=0;$i<$len;$i++){
+ $res_uri = $val["results"]["bindings"][$i]["s"]["value"];
+ $url_label = substr( $res_uri, strrpos( $res_uri, '/' )+1 );
+ if(str_replace(" ", "_", $label) == $url_label){
+ $uri = $res_uri;
+ }
+ }
+ }
+ }
+ }
+ }
}
return $uri;
}