Utils/WikiTagUtils.php
author cavaliet
Mon, 24 Oct 2011 19:08:04 +0200
changeset 12 81cc9274c20a
parent 9 cc32af725176
child 22 99c15cfe420b
permissions -rwxr-xr-x
Update Tag alias.
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
<?php
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     3
namespace IRI\Bundle\WikiTagBundle\Utils;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     4
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     5
use IRI\Bundle\WikiTagBundle\Entity\Tag;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     6
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     7
class WikiTagUtils
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     8
{
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     9
    // Constants
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
    private static $WIKIPEDIA_API_URL = "http://fr.wikipedia.org/w/api.php";
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
    private static $WIKIPEDIA_VERSION_PERMALINK_TEMPLATE = "http://fr.wikipedia.org/w/index.php?oldid=%s";
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
    private static $DBPEDIA_URI_TEMPLATE = "http://dbpedia.org/resource/%s";
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
    
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
    
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
    /**
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    16
     * Get or create tag. Returns an array(tag:WikiTagTag, revision_id=int, created:Boolean)
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
     */
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
    
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
    /**
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    20
     *
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
     * Enter description here ...
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
     * @param unknown_type $tag_label
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
     * @param unknown_type $doctrine
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
     * @return multitype:boolean Ambigous <NULL, \IRI\Bundle\WikiTagBundle\Entity\Tag> Ambigous <NULL, unknown, mixed, string> (array(\IRI\Bundle\WikiTagBundle\Model\TagInterface, revision_id=int, created:Boolean))
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
     */
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
    public static function getOrCreateTag($tag_label, $doctrine)
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    27
    {
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
        $tag_label_normalized = WikiTagUtils::normalizeTag($tag_label);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
        // We get the wikipedia references for the tag_label
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
        // We get or create the tag object
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
        $tags = $doctrine->getRepository('WikiTagBundle:Tag')->findBy(array('normalizedLabel' => $tag_label_normalized));
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
        $tag = null;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    33
        foreach ($tags as $t){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    34
            if($tag==null || $t->getUrlStatus()!=Tag::$TAG_URL_STATUS_DICT['null_result']){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    35
                $tag = $t;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    36
                if($t->getUrlStatus()!=Tag::$TAG_URL_STATUS_DICT['null_result']){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    37
                    break;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    38
                }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    39
            }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    40
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    41
        if($tag==null){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    42
            $tag = new Tag();
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    43
            $tag->setLabel($tag_label_normalized);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    44
            $tag->setOriginalLabel($tag_label);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    45
            $tag->setNormalizedLabel($tag_label_normalized);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    46
            $created = true;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    48
        else{
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    49
            $created = false;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    50
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
        
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    52
        // We request Wikipedia if the tag is created
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
        if($created==true){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    54
            $wp_response = WikiTagUtils::getWikipediaInfo($tag_label_normalized);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
            $new_label = $wp_response['new_label'];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
            $status = $wp_response['status'];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
            $url = $wp_response['wikipedia_url'];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
            $pageid = $wp_response['pageid'];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
            $dbpedia_uri = $wp_response["dbpedia_uri"];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
            $wikipedia_revision_id = $wp_response['revision_id'];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
            
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
            # We save the datas
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
            if($new_label!=null){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
                $tag->setLabel($new_label);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
            }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    66
            if($status!=null){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    67
                $tag->setUrlStatus($status);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    68
            }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    69
            $tag->setWikipediaUrl($url);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    70
            $tag->setWikipediaPageId($pageid);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    71
            $tag->setDbpediaUri($dbpedia_uri);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    72
            
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    73
            // Save datas.
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    74
            $em = $doctrine->getEntityManager();
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    75
            $em->persist($tag);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    76
            $em->flush();
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    77
            
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    78
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    79
        else if($tag!=null && $tag->getWikipediaPageId()!=null){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    80
            $wp_response = WikiTagUtils::getWikipediaInfo(null, $tag->getWikipediaPageId());
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
            $wikipedia_revision_id = $wp_response['revision_id'];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
        else{
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
            $wikipedia_revision_id = null;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
        }
8
7d2fb5d7c9ff debug addtag for document
cavaliet
parents: 2
diff changeset
    86
        //return "coucou4";
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
        
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
        return array($tag, $wikipedia_revision_id, $created);//, $wpReponse);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
    }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
    
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
    /**
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
     * Cleans the tag label
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    93
     */
8
7d2fb5d7c9ff debug addtag for document
cavaliet
parents: 2
diff changeset
    94
    public static function normalizeTag($tag_label)
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
    {
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    96
        if(strlen($tag_label)==0){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    97
            return $tag_label;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    98
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    99
        $tag_label = trim($tag_label);//tag.strip()
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
        $tag_label = str_replace("_", " ", $tag_label);//tag.replace("_", " ")
9
cc32af725176 first step for tag list and add Pagerfanta for paginator
cavaliet
parents: 8
diff changeset
   101
        $tag_label = str_replace("Œ", "oe", $tag_label);
cc32af725176 first step for tag list and add Pagerfanta for paginator
cavaliet
parents: 8
diff changeset
   102
        $tag_label = str_replace("œ", "oe", $tag_label);
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   103
        $tag_label = preg_replace('/\s+/', ' ', $tag_label);//" ".join(tag.split())
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   104
        $tag_label = ucfirst($tag_label);//tag[0].upper() + tag[1:]
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   105
        return $tag_label;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   106
    }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   107
    
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   108
    /**
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   109
     *
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   110
     * TODO: Enter description here ...
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   111
     * @param unknown_type $tag_label_normalized
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   112
     * @param unknown_type $page_id
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   113
     * @return multitype:NULL unknown |multitype:Ambigous <NULL, unknown> multitype:number  mixed Ambigous <NULL, string> Ambigous <unknown, mixed>
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   114
     */
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   115
    private static function getWikipediaInfo($tag_label_normalized, $page_id=null)
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   116
    {
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   117
        $params = array('action'=>'query', 'prop'=>'info|categories|langlinks', 'inprop'=>'url', 'lllimit'=>'500', 'cllimit'=>'500', 'rvprop'=>'ids', 'format'=>'json');
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   118
        if($tag_label_normalized!=null){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   119
            $params['titles'] = urlencode($tag_label_normalized);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   120
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   121
        else if($page_id!=null){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   122
            $params['pageids'] = $page_id;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   123
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   124
        else{
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   125
            return WikiTagUtils::returnNullResult(null);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   126
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   127
        
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   128
        $ar = WikiTagUtils::requestWikipedia($params);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   129
        $res = $ar[0];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   130
        $original_response = $res;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   131
        $pages = $ar[1];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   132
        // If there 0 or more than 1 result, the query has failed
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   133
        if(count($pages)>1 || count($pages)==0){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   134
            return WikiTagUtils::returnNullResult($res);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   135
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   136
        // get first result
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   137
        $page = reset($pages);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   138
        // Unknow entry ?
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   139
        if(array_key_exists('missing', $page) || array_key_exists('invalid', $page)){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   140
            return WikiTagUtils::returnNullResult($res);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   141
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   142
        // The entry exists, we get the datas.
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   143
        $url = array_key_exists('fullurl', $page) ? $page['fullurl'] : null;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   144
        $pageid = array_key_exists('pageid', $page) ? $page['pageid'] : null;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   145
        $new_label = array_key_exists('title', $page) ? $page['title'] : null;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   146
        // We test the status (redirect first because a redirect has no categories key)
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   147
        if(array_key_exists('redirect', $page)){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   148
            //return " REDIRECT";
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   149
            $status = Tag::$TAG_URL_STATUS_DICT["redirection"];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   150
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   151
        else if(WikiTagUtils::isHomonymy($page)){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   152
            //return " HOMONYMY";
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   153
            $status = Tag::$TAG_URL_STATUS_DICT["homonyme"];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   154
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   155
        else{
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   156
            //return " MATCH";
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   157
            $status = Tag::$TAG_URL_STATUS_DICT["match"];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   158
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   159
        // In redirection, we have to get more datas by adding redirects=true to the params
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   160
        if($status==Tag::$TAG_URL_STATUS_DICT["redirection"]){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   161
            $params['redirects'] = "true";
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   162
            $ar = WikiTagUtils::requestWikipedia($params);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   163
            $res = $ar[0];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   164
            $pages = $ar[1];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   165
            #we know that we have at least one answer
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   166
            if(count($pages)>1 || count($pages)==0){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   167
                return WikiTagUtils::returnNullResult($res);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   168
            }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   169
            // get first result
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   170
            $page = reset($pages);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   171
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   172
        
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   173
        $revision_id = $page['lastrevid'];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   174
        
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   175
        // process language to extract the english label
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   176
        $english_label = null;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   177
        if($status==Tag::$TAG_URL_STATUS_DICT["match"] || $status==Tag::$TAG_URL_STATUS_DICT["redirection"]){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   178
            if(array_key_exists("langlinks", $page)){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   179
                foreach ($page["langlinks"] as $ar) {
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   180
                    if($ar["lang"]=="en"){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   181
                        $english_label = $ar["*"];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   182
                        break;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   183
                    }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   184
                }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   185
            }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   186
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   187
        // We create the dbpedia uri.
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   188
        $dbpedia_uri = null;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   189
        if($english_label!=null && strpos($english_label, '#')===false){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   190
            $dbpedia_uri = WikiTagUtils::getDbpediaUri($english_label);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   191
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   192
        
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   193
        $wp_response = array('new_label'=>$new_label, 'status'=>$status, 'wikipedia_url'=>$url, 'pageid'=>$pageid, 'dbpedia_uri'=>$dbpedia_uri, 'revision_id'=>$revision_id, 'response'=>$original_response);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   194
        //return $url." <br/>RES =  ".$res/*." <br/>DUMP =  ".var_dump($pages)*/." <br/>COUNT =  ".count($pages)." <br/>page =  ".var_dump($page);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   195
        return $wp_response;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   196
    }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   197
    
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   198
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   199
    /**
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   200
     *
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   201
     * TODO : Enter description here ...
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   202
     * @param unknown_type $params
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   203
     * @return multitype:unknown mixed
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   204
     */
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   205
    private static function requestWikipedia($params)
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   206
    {
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   207
        $params_str = '';
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   208
        foreach ($params as $key => $value) {
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   209
            if ($params_str==''){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   210
                $params_str = $key.'='.$value;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   211
            }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   212
            else{
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   213
                $params_str .= '&'.$key.'='.$value;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   214
            }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   215
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   216
        
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   217
        $url = WikiTagUtils::$WIKIPEDIA_API_URL.'?'.$params_str;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   218
        
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   219
        $ch = curl_init();
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   220
        curl_setopt($ch, CURLOPT_URL, $url);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   221
        curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   222
        curl_setopt($ch, CURLOPT_USERAGENT, 'http://www.iri.centrepompidou.fr');
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   223
        $res = curl_exec($ch);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   224
        curl_close($ch);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   225
        
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   226
        $val = json_decode($res, true);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   227
        $pages = $val["query"]["pages"];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   228
        return array($res, $pages);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   229
    }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   230
    
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   231
    /**
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   232
     * Returns tag with a null result, usually used after a failed request on Wikipedia
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   233
     */
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   234
    private static function returnNullResult($response)
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   235
    {
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   236
        return array('new_label'=>null, 'status'=>Tag::$TAG_URL_STATUS_DICT['null_result'], 'wikipedia_url'=>null, 'pageid'=>null, 'dbpedia_uri'=>null, 'revision_id'=>null, 'response'=>$response);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   237
    }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   238
    
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   239
    /**
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   240
     * Returns tag with a null result, usually used after a failed request on Wikipedia
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   241
     */
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   242
    private static function isHomonymy($page)
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   243
    {
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   244
        //$s = "";
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   245
        foreach ($page["categories"] as $ar) {
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   246
            //$s .= ", b : ".$ar." - title = ".$ar["title"].", strpos = ".strpos($ar["title"], 'Catégorie:Homonymie');
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   247
            // Strict test because false can be seen as "O".
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   248
            if(strpos($ar["title"], 'Catégorie:Homonymie')!==false || strpos($ar["title"], 'Category:Disambiguation')!==false){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   249
                //$s .= "TRUE";
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   250
                return true;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   251
            }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   252
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   253
        return false;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   254
    }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   255
    
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   256
    /**
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   257
     * Builds DbPedia URI
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   258
     */
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   259
    private static function getDbpediaUri($english_label)
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   260
    {
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   261
        return sprintf(WikiTagUtils::$DBPEDIA_URI_TEMPLATE, WikiTagUtils::urlize_for_wikipedia($english_label));
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   262
    }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   263
    
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   264
    /**
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   265
     * URLencode label for wikipedia
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   266
     */
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   267
    private static function urlize_for_wikipedia($label){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   268
        return urlencode(str_replace(" ", "_", $label));
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   269
    }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   270
}