Utils/WikiTagUtils.php
changeset 44 c114504de4a8
parent 43 54f204bceb28
child 50 e967654e90cb
--- a/Utils/WikiTagUtils.php	Thu Nov 24 13:05:33 2011 +0100
+++ b/Utils/WikiTagUtils.php	Sun Nov 27 23:35:54 2011 +0100
@@ -13,106 +13,6 @@
     
     
     /**
-     * Get or create tag. Returns an array(tag:WikiTagTag, revision_id=int, created:Boolean)
-     */
-    
-    /**
-     *
-     * Enter description here ...
-     * @param unknown_type $tag_label
-     * @param unknown_type $doctrine
-     * @return multitype:boolean Ambigous <NULL, \IRI\Bundle\WikiTagBundle\Entity\Tag> Ambigous <NULL, unknown, mixed, string> (array(\IRI\Bundle\WikiTagBundle\Model\TagInterface, revision_id=int, created:Boolean))
-     */
-    public static function getOrCreateTag($tag_label, $doctrine)
-    {
-        $tag_label_normalized = WikiTagUtils::normalizeTag($tag_label);
-        // We get the wikipedia references for the tag_label
-        // We get or create the tag object
-        $tags = $doctrine->getRepository('WikiTagBundle:Tag')->findBy(array('normalizedLabel' => $tag_label_normalized));
-        $tag = null;
-        foreach ($tags as $t){
-            if($tag==null || $t->getUrlStatus()!=Tag::$TAG_URL_STATUS_DICT['null_result']){
-                $tag = $t;
-                if($t->getUrlStatus()!=Tag::$TAG_URL_STATUS_DICT['null_result']){
-                    break;
-                }
-            }
-        }
-        $wp_request_done = false;
-        if($tag==null){
-            $tag = new Tag();
-            $tag->setLabel($tag_label_normalized);
-            $tag->setOriginalLabel($tag_label);
-            $tag->setNormalizedLabel($tag_label_normalized);
-            $created = true;
-        }
-        else{
-            $created = false;
-            $match_exists = false;
-            // Even if a tag with the normalised label exists, IF this tag is not wikipedia semantised,
-            // we search if a wikipedia semantised version exists in the base
-            foreach ($tags as $t){
-                if($t->getUrlStatus()==Tag::$TAG_URL_STATUS_DICT['match']){
-                    $tag = $t;
-                    $match_exists = true;
-                    break;
-                }
-            }
-            if($match_exists==false){
-                $wp_response = WikiTagUtils::getWikipediaInfo($tag_label_normalized);
-                $status = $wp_response['status'];
-                if($status==Tag::$TAG_URL_STATUS_DICT['match']){
-                    $tag = new Tag();
-                    $tag->setLabel($tag_label_normalized);
-                    $tag->setOriginalLabel($tag_label);
-                    $tag->setNormalizedLabel($tag_label_normalized);
-                    $created = true;
-                    $wp_request_done = true;
-                }
-            }
-        }
-        
-        // We request Wikipedia if the tag is created
-        if($created==true){
-            if($wp_request_done==false){
-                $wp_response = WikiTagUtils::getWikipediaInfo($tag_label_normalized);
-            }
-            $new_label = $wp_response['new_label'];
-            $status = $wp_response['status'];
-            $url = $wp_response['wikipedia_url'];
-            $pageid = $wp_response['pageid'];
-            $dbpedia_uri = $wp_response["dbpedia_uri"];
-            $wikipedia_revision_id = $wp_response['revision_id'];
-            
-            # We save the datas
-            if($new_label!=null){
-                $tag->setLabel($new_label);
-            }
-            if($status!=null){
-                $tag->setUrlStatus($status);
-            }
-            $tag->setWikipediaUrl($url);
-            $tag->setWikipediaPageId($pageid);
-            $tag->setDbpediaUri($dbpedia_uri);
-            
-            // Save datas.
-            $em = $doctrine->getEntityManager();
-            $em->persist($tag);
-            $em->flush();
-            
-        }
-        else if($tag!=null && $tag->getWikipediaPageId()!=null){
-            $wp_response = WikiTagUtils::getWikipediaInfo(null, $tag->getWikipediaPageId());
-            $wikipedia_revision_id = $wp_response['revision_id'];
-        }
-        else{
-            $wikipedia_revision_id = null;
-        }
-        
-        return array($tag, $wikipedia_revision_id, $created);//, $wpReponse);
-    }
-    
-    /**
      * Cleans the tag label
      */
     public static function normalizeTag($tag_label)
@@ -122,19 +22,30 @@
         }
         $tag_label = trim($tag_label);//tag.strip()
         $tag_label = str_replace("_", " ", $tag_label);//tag.replace("_", " ")
-        $tag_label = str_replace("Œ", "oe", $tag_label);
-        $tag_label = str_replace("œ", "oe", $tag_label);
-        $tag_label = preg_replace('/\s+/', ' ', $tag_label);//" ".join(tag.split())
+        $tag_label = preg_replace('/\s+/u', ' ', $tag_label);//" ".join(tag.split())
         $tag_label = ucfirst($tag_label);//tag[0].upper() + tag[1:]
         return $tag_label;
     }
     
     /**
+     * Query wikipedia with a normalized label or a pageid
+     * return an array with the form
+     * array(
+     *      'new_label'=>$new_label,
+     *   	'alternative_label'=>$alternative_label,
+     *   	'status'=>$status,
+     *   	'wikipedia_url'=>$url,
+     *      'wikipedia_alternative_url'=>$alternative_url,
+     *   	'pageid'=>$pageid,
+     *   	'alternative_pageid'=>$alternative_pageid,
+     *   	'dbpedia_uri'=>$dbpedia_uri,
+     *   	'revision_id'=> ,
+     *   	'response'=> the original wikipedia json response
+     *   	)
      *
-     * TODO: Enter description here ...
-     * @param unknown_type $tag_label_normalized
-     * @param unknown_type $page_id
-     * @return multitype:NULL unknown |multitype:Ambigous <NULL, unknown> multitype:number  mixed Ambigous <NULL, string> Ambigous <unknown, mixed>
+     * @param string $tag_label_normalized
+     * @param bigint $page_id
+     * @return array
      */
     public static function getWikipediaInfo($tag_label_normalized, $page_id=null)
     {
@@ -181,7 +92,11 @@
             $status = Tag::$TAG_URL_STATUS_DICT["match"];
         }
         // In redirection, we have to get more datas by adding redirects=true to the params
-        if($status==Tag::$TAG_URL_STATUS_DICT["redirection"]){
+        $alternative_label = null;
+        $alternative_url = null;
+        $alternative_pageid = null;
+        if($status==Tag::$TAG_URL_STATUS_DICT["redirection"])
+        {
             $params['redirects'] = "true";
             $ar = WikiTagUtils::requestWikipedia($params);
             $res = $ar[0];
@@ -192,6 +107,9 @@
             }
             // get first result
             $page = reset($pages);
+            $alternative_label = array_key_exists('title', $page) ? $page['title'] : null;
+            $alternative_url = array_key_exists('fullurl', $page) ? $page['fullurl'] : null;
+            $alternative_pageid = array_key_exists('pageid', $page) ? $page['pageid'] : null;
         }
         
         $revision_id = $page['lastrevid'];
@@ -214,17 +132,27 @@
             $dbpedia_uri = WikiTagUtils::getDbpediaUri($english_label);
         }
         
-        $wp_response = array('new_label'=>$new_label, 'status'=>$status, 'wikipedia_url'=>$url, 'pageid'=>$pageid, 'dbpedia_uri'=>$dbpedia_uri, 'revision_id'=>$revision_id, 'response'=>$original_response);
+        $wp_response = array(
+            'new_label'=>$new_label,
+        	'alternative_label'=>$alternative_label,
+        	'status'=>$status,
+        	'wikipedia_url'=>$url,
+            'wikipedia_alternative_url'=>$alternative_url,
+        	'pageid'=>$pageid,
+        	'alternative_pageid'=>$alternative_pageid,
+        	'dbpedia_uri'=>$dbpedia_uri,
+        	'revision_id'=>$revision_id,
+        	'response'=>$original_response);
         //return $url." <br/>RES =  ".$res/*." <br/>DUMP =  ".var_dump($pages)*/." <br/>COUNT =  ".count($pages)." <br/>page =  ".var_dump($page);
         return $wp_response;
     }
     
 
     /**
+     * build and do the request to Wikipedia.
      *
-     * TODO : Enter description here ...
-     * @param unknown_type $params
-     * @return multitype:unknown mixed
+     * @param array $params
+     * @return array
      */
     private static function requestWikipedia($params)
     {