Utils/WikiTagUtils.php
changeset 115 085ea4dbfeee
parent 112 14653baf4f6b
child 116 a023e0185a02
equal deleted inserted replaced
114:ed31d6008d31 115:085ea4dbfeee
   143             $alternative_pageid = array_key_exists('pageid', $page) ? $page['pageid'] : null;
   143             $alternative_pageid = array_key_exists('pageid', $page) ? $page['pageid'] : null;
   144         }
   144         }
   145         
   145         
   146         $revision_id = $page['lastrevid'];
   146         $revision_id = $page['lastrevid'];
   147         
   147         
       
   148         // Get the dbpedia uri by requesting dbpedia with sparkl
       
   149      	
       
   150         
       
   151         /*
   148         // process language to extract the english label
   152         // process language to extract the english label
   149         $english_label = null;
   153         $english_label = null;
   150         if($status==Tag::$TAG_URL_STATUS_DICT["match"] || $status==Tag::$TAG_URL_STATUS_DICT["redirection"]){
   154         if($status==Tag::$TAG_URL_STATUS_DICT["match"] || $status==Tag::$TAG_URL_STATUS_DICT["redirection"]){
   151             if(array_key_exists("langlinks", $page)){
   155             if(array_key_exists("langlinks", $page)){
   152                 foreach ($page["langlinks"] as $ar) {
   156                 foreach ($page["langlinks"] as $ar) {
   160         // We create the dbpedia uri.
   164         // We create the dbpedia uri.
   161         $dbpedia_uri = null;
   165         $dbpedia_uri = null;
   162         if($english_label!=null && strpos($english_label, '#')===false){
   166         if($english_label!=null && strpos($english_label, '#')===false){
   163             $dbpedia_uri = WikiTagUtils::getDbpediaUri($english_label);
   167             $dbpedia_uri = WikiTagUtils::getDbpediaUri($english_label);
   164         }
   168         }
       
   169         */
       
   170         $dbpedia_uri = WikiTagUtils::getDbpediaUri($new_label);
   165         
   171         
   166         $wp_response = array(
   172         $wp_response = array(
   167             'new_label'=>$new_label,
   173             'new_label'=>$new_label,
   168         	'alternative_label'=>$alternative_label,
   174         	'alternative_label'=>$alternative_label,
   169         	'status'=>$status,
   175         	'status'=>$status,
   176         	'response'=>$original_response);
   182         	'response'=>$original_response);
   177         
   183         
   178         return $wp_response;
   184         return $wp_response;
   179     }
   185     }
   180     
   186     
       
   187     /**
       
   188      * Generic curl request
       
   189      *
       
   190      * @param string $url
       
   191      * @return object (json decoded)
       
   192      */
       
   193     private static function curlRequest($url)
       
   194     {
       
   195     	$ch = curl_init();
       
   196     	curl_setopt($ch, CURLOPT_URL, $url);
       
   197     	curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
       
   198     	// default values
       
   199     	curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0');
       
   200     	curl_setopt($ch, CURLOPT_TIMEOUT_MS, 5000);
       
   201     	// Set options if they are set in the config.yml file, typically for proxy configuration.
       
   202     	// Thanks to the configuration file, it will execute commands like "curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);" or "curl_setopt($ch, CURLOPT_PROXY, "xxx.yyy.zzz:PORT");"
       
   203     	$curl_options = $GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.curl_options");
       
   204     	foreach ($curl_options as $key => $value) {
       
   205     		if(strtoupper($value)=='TRUE'){
       
   206     			$value = TRUE;
       
   207     		}
       
   208     		else if (strtoupper($value)=='FALSE'){
       
   209     			$value = FALSE;
       
   210     		}
       
   211     		else if (defined($value)){
       
   212     			$value = constant($value);
       
   213     		}
       
   214     		curl_setopt($ch, constant($key), $value);
       
   215     	}
       
   216     	// end of treatment
       
   217     	$res = curl_exec($ch);
       
   218     	$curl_errno = curl_errno($ch);
       
   219     	$curl_error = curl_error($ch);
       
   220     	curl_close($ch);
       
   221     
       
   222     	if ($curl_errno > 0) {
       
   223     		throw new \Exception("$url\n request failed. cURLError #$curl_errno: $curl_error\n", $curl_errno, null);
       
   224     	}
       
   225     	
       
   226     	return $res;
       
   227 	}
       
   228     
   181 
   229 
   182     /**
   230     /**
   183      * build and do the request to Wikipedia.
   231      * build and do the request to Wikipedia.
   184      *
   232      *
   185      * @param array $params
   233      * @param array $params
   195             else{
   243             else{
   196                 $params_str .= '&'.$key.'='.$value;
   244                 $params_str .= '&'.$key.'='.$value;
   197             }
   245             }
   198         }
   246         }
   199         
   247         
   200         //$url = WikiTagUtils::$WIKIPEDIA_API_URL.'?'.$params_str;
       
   201         //throw new \Exception($GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.url_templates"), 1, null);
       
   202         $url = $GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.url_templates")["wikipedia_api"].'?'.$params_str;
   248         $url = $GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.url_templates")["wikipedia_api"].'?'.$params_str;
   203         
   249         
   204         $ch = curl_init();
   250         $res = WikiTagUtils::curlRequest($url);
   205         curl_setopt($ch, CURLOPT_URL, $url);
       
   206         curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
       
   207         // default values
       
   208         curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.7; rv:10.0.1) Gecko/20100101 Firefox/10.0.1');
       
   209         curl_setopt($ch, CURLOPT_TIMEOUT_MS, 5000);
       
   210         // Set options if they are set in the config.yml file, typically for proxy configuration.
       
   211         // Thanks to the configuration file, it will execute commands like "curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);" or "curl_setopt($ch, CURLOPT_PROXY, "xxx.yyy.zzz:PORT");"
       
   212         $curl_options = $GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.curl_options");
       
   213         foreach ($curl_options as $key => $value) {
       
   214             if(strtoupper($value)=='TRUE'){
       
   215                 $value = TRUE;
       
   216             }
       
   217             else if (strtoupper($value)=='FALSE'){
       
   218                 $value = FALSE;
       
   219             }
       
   220             else if (defined($value)){
       
   221                 $value = constant($value);
       
   222             }
       
   223             curl_setopt($ch, constant($key), $value);
       
   224         }
       
   225         // end of treatment
       
   226         $res = curl_exec($ch);
       
   227         $curl_errno = curl_errno($ch);
       
   228         $curl_error = curl_error($ch);
       
   229         curl_close($ch);
       
   230         
       
   231         if ($curl_errno > 0) {
       
   232             throw new \Exception("Wikipedia request failed. cURLError #$curl_errno: $curl_error\n", $curl_errno, null);
       
   233         }
       
   234         
       
   235         $val = json_decode($res, true);
   251         $val = json_decode($res, true);
   236         $pages = $val["query"]["pages"];
   252         $pages = $val["query"]["pages"];
   237         return array($res, $pages);
   253         return array($res, $pages);
   238     }
   254     }
   239     
   255     
   263     }
   279     }
   264     
   280     
   265     /**
   281     /**
   266      * Builds DbPedia URI
   282      * Builds DbPedia URI
   267      */
   283      */
   268     private static function getDbpediaUri($english_label)
   284     private static function getDbpediaUri($label, $params=[])
   269     {
   285     {
   270         return sprintf($GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.url_templates")["dbpedia"], WikiTagUtils::urlize_for_wikipedia($english_label));
   286     	// Get lang from url
       
   287     	$dbp_url = $GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.url_templates")["dbpedia_sparql"];
       
   288     	$lang = substr($dbp_url, 7, 2);
       
   289     	$params = [
       
   290     		"query" => 'select distinct * where {?s rdfs:label "'.$label.'"@'.$lang.'}',
       
   291     		"format" => 'application/json',
       
   292     	];
       
   293 
       
   294     	$params_str = '';
       
   295     	foreach ($params as $key => $value) {
       
   296     		if ($params_str==''){
       
   297     			$params_str = $key.'='.urlencode($value);
       
   298     		}
       
   299     		else{
       
   300     			$params_str .= '&'.$key.'='.urlencode($value);
       
   301     		}
       
   302     	}
       
   303     	
       
   304     	$url = $GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.url_templates")["dbpedia_sparql"].'?'.$params_str;
       
   305     	
       
   306     	$res = WikiTagUtils::curlRequest($url);
       
   307     	$val = json_decode($res, true);
       
   308     	$uri = "";
       
   309     	if(array_key_exists("results", $val)){
       
   310     		if(array_key_exists("bindings", $val["results"])){
       
   311     			if(count($val["results"]["bindings"]) > 0){
       
   312     				$uri = $val["results"]["bindings"][0]["s"]["value"];
       
   313     			}
       
   314     		}
       
   315     	}
       
   316     	return $uri;
   271     }
   317     }
   272     
   318     
   273     /**
   319     /**
   274      * URLencode label for wikipedia
   320      * URLencode label for wikipedia
   275      */
   321      */