Utils/WikiTagUtils.php
changeset 42 0e57c730bb18
parent 32 38dcd2db04e4
child 43 54f204bceb28
equal deleted inserted replaced
39:b403086580f7 42:0e57c730bb18
     9     // Constants
     9     // Constants
    10     private static $WIKIPEDIA_API_URL = "http://fr.wikipedia.org/w/api.php";
    10     private static $WIKIPEDIA_API_URL = "http://fr.wikipedia.org/w/api.php";
    11     private static $WIKIPEDIA_VERSION_PERMALINK_TEMPLATE = "http://fr.wikipedia.org/w/index.php?oldid=%s";
    11     private static $WIKIPEDIA_VERSION_PERMALINK_TEMPLATE = "http://fr.wikipedia.org/w/index.php?oldid=%s";
    12     private static $DBPEDIA_URI_TEMPLATE = "http://dbpedia.org/resource/%s";
    12     private static $DBPEDIA_URI_TEMPLATE = "http://dbpedia.org/resource/%s";
    13     
    13     
    14     
       
    15     /**
       
    16      * Get or create tag. Returns an array(tag:WikiTagTag, revision_id=int, created:Boolean)
       
    17      */
       
    18     
       
    19     /**
       
    20      *
       
    21      * Enter description here ...
       
    22      * @param unknown_type $tag_label
       
    23      * @param unknown_type $doctrine
       
    24      * @return multitype:boolean Ambigous <NULL, \IRI\Bundle\WikiTagBundle\Entity\Tag> Ambigous <NULL, unknown, mixed, string> (array(\IRI\Bundle\WikiTagBundle\Model\TagInterface, revision_id=int, created:Boolean))
       
    25      */
       
    26     public static function getOrCreateTag($tag_label, $doctrine)
       
    27     {
       
    28         $tag_label_normalized = WikiTagUtils::normalizeTag($tag_label);
       
    29         // We get the wikipedia references for the tag_label
       
    30         // We get or create the tag object
       
    31         $tags = $doctrine->getRepository('WikiTagBundle:Tag')->findBy(array('normalizedLabel' => $tag_label_normalized));
       
    32         $tag = null;
       
    33         foreach ($tags as $t){
       
    34             if($tag==null || $t->getUrlStatus()!=Tag::$TAG_URL_STATUS_DICT['null_result']){
       
    35                 $tag = $t;
       
    36                 if($t->getUrlStatus()!=Tag::$TAG_URL_STATUS_DICT['null_result']){
       
    37                     break;
       
    38                 }
       
    39             }
       
    40         }
       
    41         $wp_request_done = false;
       
    42         if($tag==null){
       
    43             $tag = new Tag();
       
    44             $tag->setLabel($tag_label_normalized);
       
    45             $tag->setOriginalLabel($tag_label);
       
    46             $tag->setNormalizedLabel($tag_label_normalized);
       
    47             $created = true;
       
    48         }
       
    49         else{
       
    50             $created = false;
       
    51             $match_exists = false;
       
    52             // Even if a tag with the normalised label exists, IF this tag is not wikipedia semantised,
       
    53             // we search if a wikipedia semantised version exists in the base
       
    54             foreach ($tags as $t){
       
    55                 if($t->getUrlStatus()==Tag::$TAG_URL_STATUS_DICT['match']){
       
    56                     $tag = $t;
       
    57                     $match_exists = true;
       
    58                     break;
       
    59                 }
       
    60             }
       
    61             if($match_exists==false){
       
    62                 $wp_response = WikiTagUtils::getWikipediaInfo($tag_label_normalized);
       
    63                 $status = $wp_response['status'];
       
    64                 if($status==Tag::$TAG_URL_STATUS_DICT['match']){
       
    65                     $tag = new Tag();
       
    66                     $tag->setLabel($tag_label_normalized);
       
    67                     $tag->setOriginalLabel($tag_label);
       
    68                     $tag->setNormalizedLabel($tag_label_normalized);
       
    69                     $created = true;
       
    70                     $wp_request_done = true;
       
    71                 }
       
    72             }
       
    73         }
       
    74         
       
    75         // We request Wikipedia if the tag is created
       
    76         if($created==true){
       
    77             if($wp_request_done==false){
       
    78                 $wp_response = WikiTagUtils::getWikipediaInfo($tag_label_normalized);
       
    79             }
       
    80             $new_label = $wp_response['new_label'];
       
    81             $status = $wp_response['status'];
       
    82             $url = $wp_response['wikipedia_url'];
       
    83             $pageid = $wp_response['pageid'];
       
    84             $dbpedia_uri = $wp_response["dbpedia_uri"];
       
    85             $wikipedia_revision_id = $wp_response['revision_id'];
       
    86             
       
    87             # We save the datas
       
    88             if($new_label!=null){
       
    89                 $tag->setLabel($new_label);
       
    90             }
       
    91             if($status!=null){
       
    92                 $tag->setUrlStatus($status);
       
    93             }
       
    94             $tag->setWikipediaUrl($url);
       
    95             $tag->setWikipediaPageId($pageid);
       
    96             $tag->setDbpediaUri($dbpedia_uri);
       
    97             
       
    98             // Save datas.
       
    99             $em = $doctrine->getEntityManager();
       
   100             $em->persist($tag);
       
   101             $em->flush();
       
   102             
       
   103         }
       
   104         else if($tag!=null && $tag->getWikipediaPageId()!=null){
       
   105             $wp_response = WikiTagUtils::getWikipediaInfo(null, $tag->getWikipediaPageId());
       
   106             $wikipedia_revision_id = $wp_response['revision_id'];
       
   107         }
       
   108         else{
       
   109             $wikipedia_revision_id = null;
       
   110         }
       
   111         
       
   112         return array($tag, $wikipedia_revision_id, $created);//, $wpReponse);
       
   113     }
       
   114     
    14     
   115     /**
    15     /**
   116      * Cleans the tag label
    16      * Cleans the tag label
   117      */
    17      */
   118     public static function normalizeTag($tag_label)
    18     public static function normalizeTag($tag_label)
   179         else{
    79         else{
   180             //return " MATCH";
    80             //return " MATCH";
   181             $status = Tag::$TAG_URL_STATUS_DICT["match"];
    81             $status = Tag::$TAG_URL_STATUS_DICT["match"];
   182         }
    82         }
   183         // In redirection, we have to get more datas by adding redirects=true to the params
    83         // In redirection, we have to get more datas by adding redirects=true to the params
   184         if($status==Tag::$TAG_URL_STATUS_DICT["redirection"]){
    84         $alternative_label = null;
       
    85         $alternative_url = null;
       
    86         $alternative_pageid = null;
       
    87         if($status==Tag::$TAG_URL_STATUS_DICT["redirection"])
       
    88         {
       
    89             //TODO: add alternative label
   185             $params['redirects'] = "true";
    90             $params['redirects'] = "true";
   186             $ar = WikiTagUtils::requestWikipedia($params);
    91             $ar = WikiTagUtils::requestWikipedia($params);
   187             $res = $ar[0];
    92             $res = $ar[0];
   188             $pages = $ar[1];
    93             $pages = $ar[1];
   189             #we know that we have at least one answer
    94             #we know that we have at least one answer
   190             if(count($pages)>1 || count($pages)==0){
    95             if(count($pages)>1 || count($pages)==0){
   191                 return WikiTagUtils::returnNullResult($res);
    96                 return WikiTagUtils::returnNullResult($res);
   192             }
    97             }
   193             // get first result
    98             // get first result
   194             $page = reset($pages);
    99             $page = reset($pages);
       
   100             $alternative_label = array_key_exists('title', $page) ? $page['title'] : null;
       
   101             $alternative_url = array_key_exists('fullurl', $page) ? $page['fullurl'] : null;
       
   102             $alternative_pageid = array_key_exists('pageid', $page) ? $page['pageid'] : null;
   195         }
   103         }
   196         
   104         
   197         $revision_id = $page['lastrevid'];
   105         $revision_id = $page['lastrevid'];
   198         
   106         
   199         // process language to extract the english label
   107         // process language to extract the english label
   212         $dbpedia_uri = null;
   120         $dbpedia_uri = null;
   213         if($english_label!=null && strpos($english_label, '#')===false){
   121         if($english_label!=null && strpos($english_label, '#')===false){
   214             $dbpedia_uri = WikiTagUtils::getDbpediaUri($english_label);
   122             $dbpedia_uri = WikiTagUtils::getDbpediaUri($english_label);
   215         }
   123         }
   216         
   124         
   217         $wp_response = array('new_label'=>$new_label, 'status'=>$status, 'wikipedia_url'=>$url, 'pageid'=>$pageid, 'dbpedia_uri'=>$dbpedia_uri, 'revision_id'=>$revision_id, 'response'=>$original_response);
   125         $wp_response = array(
       
   126             'new_label'=>$new_label,
       
   127         	'alternative_label'=>$alternative_label,
       
   128         	'status'=>$status,
       
   129         	'wikipedia_url'=>$url,
       
   130             'wikipedia_alternative_url'=>$alternative_url,
       
   131         	'pageid'=>$pageid,
       
   132         	'alternative_pageid'=>$alternative_pageid,
       
   133         	'dbpedia_uri'=>$dbpedia_uri,
       
   134         	'revision_id'=>$revision_id,
       
   135         	'response'=>$original_response);
   218         //return $url." <br/>RES =  ".$res/*." <br/>DUMP =  ".var_dump($pages)*/." <br/>COUNT =  ".count($pages)." <br/>page =  ".var_dump($page);
   136         //return $url." <br/>RES =  ".$res/*." <br/>DUMP =  ".var_dump($pages)*/." <br/>COUNT =  ".count($pages)." <br/>page =  ".var_dump($page);
   219         return $wp_response;
   137         return $wp_response;
   220     }
   138     }
   221     
   139     
   222 
   140