Utils/WikiTagUtils.php
author ymh <ymh.work@gmail.com>
Fri, 26 Sep 2014 10:04:40 +0200
changeset 136 f209fcb0df6c
parent 129 65c12455ce74
permissions -rwxr-xr-x
Added tag V00.18 for changeset ab728cb3632c
Ignore whitespace changes - Everywhere: Within whitespace: At end of lines:
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
     1
<?php
74
901463f9b11c add headers for public repository release
ymh <ymh.work@gmail.com>
parents: 68
diff changeset
     2
/*
901463f9b11c add headers for public repository release
ymh <ymh.work@gmail.com>
parents: 68
diff changeset
     3
 * This file is part of the WikiTagBundle package.
901463f9b11c add headers for public repository release
ymh <ymh.work@gmail.com>
parents: 68
diff changeset
     4
 *
901463f9b11c add headers for public repository release
ymh <ymh.work@gmail.com>
parents: 68
diff changeset
     5
 * (c) IRI <http://www.iri.centrepompidou.fr/>
901463f9b11c add headers for public repository release
ymh <ymh.work@gmail.com>
parents: 68
diff changeset
     6
 *
901463f9b11c add headers for public repository release
ymh <ymh.work@gmail.com>
parents: 68
diff changeset
     7
 * For the full copyright and license information, please view the LICENSE
901463f9b11c add headers for public repository release
ymh <ymh.work@gmail.com>
parents: 68
diff changeset
     8
 * file that was distributed with this source code.
901463f9b11c add headers for public repository release
ymh <ymh.work@gmail.com>
parents: 68
diff changeset
     9
 */
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    10
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    11
namespace IRI\Bundle\WikiTagBundle\Utils;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    12
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    13
use IRI\Bundle\WikiTagBundle\Entity\Tag;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    14
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    15
class WikiTagUtils
112
14653baf4f6b first change for wikipedia and dbpedia lang configuration
cavaliet
parents: 77
diff changeset
    16
{   
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    17
    /**
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    18
     * Cleans the tag label
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    19
     */
8
7d2fb5d7c9ff debug addtag for document
cavaliet
parents: 2
diff changeset
    20
    public static function normalizeTag($tag_label)
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    21
    {
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    22
        if(strlen($tag_label)==0){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    23
            return $tag_label;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    24
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    25
        $tag_label = trim($tag_label);//tag.strip()
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    26
        $tag_label = str_replace("_", " ", $tag_label);//tag.replace("_", " ")
43
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    27
        $tag_label = preg_replace('/\s+/u', ' ', $tag_label);//" ".join(tag.split())
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    28
        $tag_label = ucfirst($tag_label);//tag[0].upper() + tag[1:]
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    29
        return $tag_label;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    30
    }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    31
    
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    32
    /**
43
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    33
     * Query wikipedia with a normalized label or a pageid
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    34
     * return an array with the form
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    35
     * array(
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    36
     *      'new_label'=>$new_label,
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    37
     *   	'alternative_label'=>$alternative_label,
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    38
     *   	'status'=>$status,
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    39
     *   	'wikipedia_url'=>$url,
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    40
     *      'wikipedia_alternative_url'=>$alternative_url,
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    41
     *   	'pageid'=>$pageid,
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    42
     *   	'alternative_pageid'=>$alternative_pageid,
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    43
     *   	'dbpedia_uri'=>$dbpedia_uri,
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    44
     *   	'revision_id'=> ,
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    45
     *   	'response'=> the original wikipedia json response
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    46
     *   	)
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    47
     *
43
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    48
     * @param string $tag_label_normalized
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    49
     * @param bigint $page_id
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
    50
     * @return array
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    51
     */
68
e7384fb35f7a improve search test and documentation
ymh <ymh.work@gmail.com>
parents: 67
diff changeset
    52
    public static function getWikipediaInfo($tag_label_normalized, $page_id=null, $ignore_wikipedia_error=false, $logger = null)
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    53
    {
68
e7384fb35f7a improve search test and documentation
ymh <ymh.work@gmail.com>
parents: 67
diff changeset
    54
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    55
        $params = array('action'=>'query', 'prop'=>'info|categories|langlinks', 'inprop'=>'url', 'lllimit'=>'500', 'cllimit'=>'500', 'rvprop'=>'ids', 'format'=>'json');
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    56
        if($tag_label_normalized!=null){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    57
            $params['titles'] = urlencode($tag_label_normalized);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    58
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    59
        else if($page_id!=null){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    60
            $params['pageids'] = $page_id;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    61
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    62
        else{
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    63
            return WikiTagUtils::returnNullResult(null);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    64
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    65
        
63
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
    66
        try {
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
    67
            $ar = WikiTagUtils::requestWikipedia($params);
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
    68
        }
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
    69
        catch(\Exception $e) {
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
    70
            if($ignore_wikipedia_error) {
68
e7384fb35f7a improve search test and documentation
ymh <ymh.work@gmail.com>
parents: 67
diff changeset
    71
                if(!is_null($logger)) {
e7384fb35f7a improve search test and documentation
ymh <ymh.work@gmail.com>
parents: 67
diff changeset
    72
                    $logger->err("Error when querying wikipedia : ".$e->getMessage()." with trace : ".$e->getTraceAsString());
e7384fb35f7a improve search test and documentation
ymh <ymh.work@gmail.com>
parents: 67
diff changeset
    73
                }
63
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
    74
                return WikiTagUtils::returnNullResult(null);
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
    75
            }
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
    76
            else {
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
    77
                throw $e;
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
    78
            }
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
    79
        }
60
984ba20c150b Remove unnecessary try/catch
ymh <ymh.work@gmail.com>
parents: 50
diff changeset
    80
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    81
        $res = $ar[0];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    82
        $original_response = $res;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    83
        $pages = $ar[1];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    84
        // If there 0 or more than 1 result, the query has failed
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    85
        if(count($pages)>1 || count($pages)==0){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    86
            return WikiTagUtils::returnNullResult($res);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    87
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    88
        // get first result
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    89
        $page = reset($pages);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    90
        // Unknow entry ?
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    91
        if(array_key_exists('missing', $page) || array_key_exists('invalid', $page)){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    92
            return WikiTagUtils::returnNullResult($res);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    93
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    94
        // The entry exists, we get the datas.
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    95
        $url = array_key_exists('fullurl', $page) ? $page['fullurl'] : null;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    96
        $pageid = array_key_exists('pageid', $page) ? $page['pageid'] : null;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    97
        $new_label = array_key_exists('title', $page) ? $page['title'] : null;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    98
        // We test the status (redirect first because a redirect has no categories key)
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
    99
        if(array_key_exists('redirect', $page)){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   100
            //return " REDIRECT";
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   101
            $status = Tag::$TAG_URL_STATUS_DICT["redirection"];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   102
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   103
        else if(WikiTagUtils::isHomonymy($page)){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   104
            //return " HOMONYMY";
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   105
            $status = Tag::$TAG_URL_STATUS_DICT["homonyme"];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   106
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   107
        else{
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   108
            //return " MATCH";
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   109
            $status = Tag::$TAG_URL_STATUS_DICT["match"];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   110
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   111
        // In redirection, we have to get more datas by adding redirects=true to the params
42
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   112
        $alternative_label = null;
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   113
        $alternative_url = null;
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   114
        $alternative_pageid = null;
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   115
        if($status==Tag::$TAG_URL_STATUS_DICT["redirection"])
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   116
        {
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   117
            $params['redirects'] = "true";
63
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
   118
            try {
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
   119
                $ar = WikiTagUtils::requestWikipedia($params);
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
   120
            }
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
   121
            catch(\Exception $e) {
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
   122
                if($ignore_wikipedia_error) {
68
e7384fb35f7a improve search test and documentation
ymh <ymh.work@gmail.com>
parents: 67
diff changeset
   123
                    if(!is_null($logger)) {
e7384fb35f7a improve search test and documentation
ymh <ymh.work@gmail.com>
parents: 67
diff changeset
   124
                        $logger->error("Error when querying wikipedia for redirection : ".$e->getMessage()." with trace : ".$e->getTraceAsString());
e7384fb35f7a improve search test and documentation
ymh <ymh.work@gmail.com>
parents: 67
diff changeset
   125
                    }
63
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
   126
                    return WikiTagUtils::returnNullResult(null);
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
   127
                }
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
   128
                else {
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
   129
                    throw $e;
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
   130
                }
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
   131
            }
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
   132
            
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   133
            $res = $ar[0];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   134
            $pages = $ar[1];
120
6fd1ff318825 clean some code
cavaliet
parents: 117
diff changeset
   135
            // we know that we have at least one answer
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   136
            if(count($pages)>1 || count($pages)==0){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   137
                return WikiTagUtils::returnNullResult($res);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   138
            }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   139
            // get first result
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   140
            $page = reset($pages);
42
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   141
            $alternative_label = array_key_exists('title', $page) ? $page['title'] : null;
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   142
            $alternative_url = array_key_exists('fullurl', $page) ? $page['fullurl'] : null;
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   143
            $alternative_pageid = array_key_exists('pageid', $page) ? $page['pageid'] : null;
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   144
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   145
        
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   146
        $revision_id = $page['lastrevid'];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   147
        
120
6fd1ff318825 clean some code
cavaliet
parents: 117
diff changeset
   148
        // Get the dbpedia uri by requesting dbpedia with sparql
115
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   149
        $dbpedia_uri = WikiTagUtils::getDbpediaUri($new_label);
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   150
        
42
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   151
        $wp_response = array(
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   152
            'new_label'=>$new_label,
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   153
        	'alternative_label'=>$alternative_label,
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   154
        	'status'=>$status,
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   155
        	'wikipedia_url'=>$url,
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   156
            'wikipedia_alternative_url'=>$alternative_url,
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   157
        	'pageid'=>$pageid,
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   158
        	'alternative_pageid'=>$alternative_pageid,
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   159
        	'dbpedia_uri'=>$dbpedia_uri,
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   160
        	'revision_id'=>$revision_id,
0e57c730bb18 Documentation and add alternative wp url and label + migrations
ymh <ymh.work@gmail.com>
parents: 32
diff changeset
   161
        	'response'=>$original_response);
63
774ba82dca59 add tests and fixtures
ymh <ymh.work@gmail.com>
parents: 60
diff changeset
   162
        
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   163
        return $wp_response;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   164
    }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   165
    
115
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   166
    /**
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   167
     * Generic curl request
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   168
     *
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   169
     * @param string $url
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   170
     * @return object (json decoded)
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   171
     */
116
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   172
    private static function curlRequest($url, $throw_error=true)
115
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   173
    {
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   174
    	$ch = curl_init();
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   175
    	curl_setopt($ch, CURLOPT_URL, $url);
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   176
    	curl_setopt($ch, CURLOPT_RETURNTRANSFER, 1);
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   177
    	// default values
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   178
    	curl_setopt($ch, CURLOPT_USERAGENT, 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10.9; rv:26.0) Gecko/20100101 Firefox/26.0');
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   179
    	curl_setopt($ch, CURLOPT_TIMEOUT_MS, 5000);
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   180
    	// Set options if they are set in the config.yml file, typically for proxy configuration.
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   181
    	// Thanks to the configuration file, it will execute commands like "curl_setopt($ch, CURLOPT_PROXYTYPE, CURLPROXY_HTTP);" or "curl_setopt($ch, CURLOPT_PROXY, "xxx.yyy.zzz:PORT");"
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   182
    	$curl_options = $GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.curl_options");
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   183
    	foreach ($curl_options as $key => $value) {
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   184
    		if(strtoupper($value)=='TRUE'){
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   185
    			$value = TRUE;
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   186
    		}
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   187
    		else if (strtoupper($value)=='FALSE'){
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   188
    			$value = FALSE;
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   189
    		}
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   190
    		else if (defined($value)){
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   191
    			$value = constant($value);
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   192
    		}
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   193
    		curl_setopt($ch, constant($key), $value);
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   194
    	}
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   195
    	// end of treatment
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   196
    	$res = curl_exec($ch);
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   197
    	$curl_errno = curl_errno($ch);
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   198
    	$curl_error = curl_error($ch);
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   199
    	curl_close($ch);
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   200
    
116
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   201
    	if ($curl_errno > 0 && $throw_error) {
115
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   202
    		throw new \Exception("$url\n request failed. cURLError #$curl_errno: $curl_error\n", $curl_errno, null);
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   203
    	}
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   204
    	
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   205
    	return $res;
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   206
	}
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   207
    
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   208
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   209
    /**
43
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
   210
     * build and do the request to Wikipedia.
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   211
     *
43
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
   212
     * @param array $params
54f204bceb28 Correct migration
ymh <ymh.work@gmail.com>
parents: 42
diff changeset
   213
     * @return array
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   214
     */
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   215
    private static function requestWikipedia($params)
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   216
    {
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   217
        $params_str = '';
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   218
        foreach ($params as $key => $value) {
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   219
            if ($params_str==''){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   220
                $params_str = $key.'='.$value;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   221
            }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   222
            else{
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   223
                $params_str .= '&'.$key.'='.$value;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   224
            }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   225
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   226
        
129
65c12455ce74 adapt code to be compatible with dinosaur php 5.3
cavaliet
parents: 120
diff changeset
   227
        $url_templates = $GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.url_templates");
65c12455ce74 adapt code to be compatible with dinosaur php 5.3
cavaliet
parents: 120
diff changeset
   228
        $url = $url_templates["wikipedia_api"].'?'.$params_str;
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   229
        
115
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   230
        $res = WikiTagUtils::curlRequest($url);
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   231
        $val = json_decode($res, true);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   232
        $pages = $val["query"]["pages"];
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   233
        return array($res, $pages);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   234
    }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   235
    
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   236
    /**
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   237
     * Returns tag with a null result, usually used after a failed request on Wikipedia
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   238
     */
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   239
    private static function returnNullResult($response)
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   240
    {
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   241
        return array('new_label'=>null, 'status'=>Tag::$TAG_URL_STATUS_DICT['null_result'], 'wikipedia_url'=>null, 'pageid'=>null, 'dbpedia_uri'=>null, 'revision_id'=>null, 'response'=>$response);
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   242
    }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   243
    
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   244
    /**
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   245
     * Returns tag with a null result, usually used after a failed request on Wikipedia
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   246
     */
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   247
    private static function isHomonymy($page)
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   248
    {
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   249
        //$s = "";
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   250
        foreach ($page["categories"] as $ar) {
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   251
            //$s .= ", b : ".$ar." - title = ".$ar["title"].", strpos = ".strpos($ar["title"], 'Catégorie:Homonymie');
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   252
            // Strict test because false can be seen as "O".
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   253
            if(strpos($ar["title"], 'Catégorie:Homonymie')!==false || strpos($ar["title"], 'Category:Disambiguation')!==false){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   254
                //$s .= "TRUE";
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   255
                return true;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   256
            }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   257
        }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   258
        return false;
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   259
    }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   260
    
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   261
    /**
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   262
     * Builds DbPedia URI
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   263
     */
129
65c12455ce74 adapt code to be compatible with dinosaur php 5.3
cavaliet
parents: 120
diff changeset
   264
    public static function getDbpediaUri($label, $params=array(), $throw_error=true, $req_param="label")
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   265
    {
115
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   266
    	// Get lang from url
129
65c12455ce74 adapt code to be compatible with dinosaur php 5.3
cavaliet
parents: 120
diff changeset
   267
    	$url_templates = $GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.url_templates");
65c12455ce74 adapt code to be compatible with dinosaur php 5.3
cavaliet
parents: 120
diff changeset
   268
    	$dbp_url = $url_templates["dbpedia_sparql"];
115
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   269
    	$lang = substr($dbp_url, 7, 2);
120
6fd1ff318825 clean some code
cavaliet
parents: 117
diff changeset
   270
    	// filter with regexp to avoid results with "category:LABEL" or other "abc:LABEL"
117
5771052a647a better migration for dbpedia uri
cavaliet
parents: 116
diff changeset
   271
    	$query = 'select distinct * where { ?s rdfs:label "'.$label.'"@'.$lang.' . FILTER (regex(?s, "^http\\\\://[^:]+$")) }';
5771052a647a better migration for dbpedia uri
cavaliet
parents: 116
diff changeset
   272
    	if($req_param=="pageid"){
5771052a647a better migration for dbpedia uri
cavaliet
parents: 116
diff changeset
   273
    		$query = 'select distinct * where { ?s dbpedia-owl:wikiPageID '.$label.' }';
5771052a647a better migration for dbpedia uri
cavaliet
parents: 116
diff changeset
   274
    	}
5771052a647a better migration for dbpedia uri
cavaliet
parents: 116
diff changeset
   275
    	elseif ($req_param=="wikiurl"){
5771052a647a better migration for dbpedia uri
cavaliet
parents: 116
diff changeset
   276
    		$query = 'select distinct * where { ?s foaf:isPrimaryTopicOf <'.$label.'> }';
5771052a647a better migration for dbpedia uri
cavaliet
parents: 116
diff changeset
   277
    	}
5771052a647a better migration for dbpedia uri
cavaliet
parents: 116
diff changeset
   278
    	
129
65c12455ce74 adapt code to be compatible with dinosaur php 5.3
cavaliet
parents: 120
diff changeset
   279
    	$params = array(
117
5771052a647a better migration for dbpedia uri
cavaliet
parents: 116
diff changeset
   280
    		"query" => $query,
115
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   281
    		"format" => 'application/json',
129
65c12455ce74 adapt code to be compatible with dinosaur php 5.3
cavaliet
parents: 120
diff changeset
   282
    	);
116
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   283
    	
115
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   284
    	$params_str = '';
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   285
    	foreach ($params as $key => $value) {
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   286
    		if ($params_str==''){
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   287
    			$params_str = $key.'='.urlencode($value);
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   288
    		}
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   289
    		else{
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   290
    			$params_str .= '&'.$key.'='.urlencode($value);
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   291
    		}
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   292
    	}
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   293
    	
129
65c12455ce74 adapt code to be compatible with dinosaur php 5.3
cavaliet
parents: 120
diff changeset
   294
    	$url_templates = $GLOBALS["kernel"]->getContainer()->getParameter("wiki_tag.url_templates");
65c12455ce74 adapt code to be compatible with dinosaur php 5.3
cavaliet
parents: 120
diff changeset
   295
    	$url = $url_templates["dbpedia_sparql"].'?'.$params_str;
115
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   296
    	
116
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   297
    	$res = WikiTagUtils::curlRequest($url, $throw_error);
115
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   298
    	$val = json_decode($res, true);
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   299
    	$uri = "";
116
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   300
    	if($val){
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   301
	    	if(array_key_exists("results", $val)){
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   302
	    		if(array_key_exists("bindings", $val["results"])){
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   303
	    			$len = count($val["results"]["bindings"]);
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   304
	    			if($len > 0){
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   305
	    				$uri = $val["results"]["bindings"][0]["s"]["value"];
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   306
	    				if($len>1){
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   307
	    					// If there are several results, we test the "url label" to see if it matches the label.
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   308
	    					// Why ? Because, for example "1000" gets "Category:1000" and "1000" as result.
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   309
	    					// We keep this code to be safe but the regexp in the sparql request normally avoids this problem.
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   310
	    					for($i=0;$i<$len;$i++){
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   311
	    						$res_uri = $val["results"]["bindings"][$i]["s"]["value"];
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   312
	    						$url_label = substr( $res_uri, strrpos( $res_uri, '/' )+1 );
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   313
	    						if(str_replace(" ", "_", $label) == $url_label){
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   314
	    							$uri = $res_uri;
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   315
	    						}
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   316
	    					}
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   317
	    				}
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   318
	    			}
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   319
	    		}
a023e0185a02 migration to real dbpedia uri
cavaliet
parents: 115
diff changeset
   320
	    	}
115
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   321
    	}
085ea4dbfeee dbpedia sparql query
cavaliet
parents: 112
diff changeset
   322
    	return $uri;
2
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   323
    }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   324
    
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   325
    /**
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   326
     * URLencode label for wikipedia
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   327
     */
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   328
    private static function urlize_for_wikipedia($label){
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   329
        return urlencode(str_replace(" ", "_", $label));
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   330
    }
13f43f53d0ba first implementation
ymh <ymh.work@gmail.com>
parents:
diff changeset
   331
}