11 private static $WIKIPEDIA_VERSION_PERMALINK_TEMPLATE = "http://fr.wikipedia.org/w/index.php?oldid=%s"; |
11 private static $WIKIPEDIA_VERSION_PERMALINK_TEMPLATE = "http://fr.wikipedia.org/w/index.php?oldid=%s"; |
12 private static $DBPEDIA_URI_TEMPLATE = "http://dbpedia.org/resource/%s"; |
12 private static $DBPEDIA_URI_TEMPLATE = "http://dbpedia.org/resource/%s"; |
13 |
13 |
14 |
14 |
15 /** |
15 /** |
16 * Get or create tag. Returns an array(tag:WikiTagTag, revision_id=int, created:Boolean) |
|
17 */ |
|
18 |
|
19 /** |
|
20 * |
|
21 * Enter description here ... |
|
22 * @param unknown_type $tag_label |
|
23 * @param unknown_type $doctrine |
|
24 * @return multitype:boolean Ambigous <NULL, \IRI\Bundle\WikiTagBundle\Entity\Tag> Ambigous <NULL, unknown, mixed, string> (array(\IRI\Bundle\WikiTagBundle\Model\TagInterface, revision_id=int, created:Boolean)) |
|
25 */ |
|
26 public static function getOrCreateTag($tag_label, $doctrine) |
|
27 { |
|
28 $tag_label_normalized = WikiTagUtils::normalizeTag($tag_label); |
|
29 // We get the wikipedia references for the tag_label |
|
30 // We get or create the tag object |
|
31 $tags = $doctrine->getRepository('WikiTagBundle:Tag')->findBy(array('normalizedLabel' => $tag_label_normalized)); |
|
32 $tag = null; |
|
33 foreach ($tags as $t){ |
|
34 if($tag==null || $t->getUrlStatus()!=Tag::$TAG_URL_STATUS_DICT['null_result']){ |
|
35 $tag = $t; |
|
36 if($t->getUrlStatus()!=Tag::$TAG_URL_STATUS_DICT['null_result']){ |
|
37 break; |
|
38 } |
|
39 } |
|
40 } |
|
41 $wp_request_done = false; |
|
42 if($tag==null){ |
|
43 $tag = new Tag(); |
|
44 $tag->setLabel($tag_label_normalized); |
|
45 $tag->setOriginalLabel($tag_label); |
|
46 $tag->setNormalizedLabel($tag_label_normalized); |
|
47 $created = true; |
|
48 } |
|
49 else{ |
|
50 $created = false; |
|
51 $match_exists = false; |
|
52 // Even if a tag with the normalised label exists, IF this tag is not wikipedia semantised, |
|
53 // we search if a wikipedia semantised version exists in the base |
|
54 foreach ($tags as $t){ |
|
55 if($t->getUrlStatus()==Tag::$TAG_URL_STATUS_DICT['match']){ |
|
56 $tag = $t; |
|
57 $match_exists = true; |
|
58 break; |
|
59 } |
|
60 } |
|
61 if($match_exists==false){ |
|
62 $wp_response = WikiTagUtils::getWikipediaInfo($tag_label_normalized); |
|
63 $status = $wp_response['status']; |
|
64 if($status==Tag::$TAG_URL_STATUS_DICT['match']){ |
|
65 $tag = new Tag(); |
|
66 $tag->setLabel($tag_label_normalized); |
|
67 $tag->setOriginalLabel($tag_label); |
|
68 $tag->setNormalizedLabel($tag_label_normalized); |
|
69 $created = true; |
|
70 $wp_request_done = true; |
|
71 } |
|
72 } |
|
73 } |
|
74 |
|
75 // We request Wikipedia if the tag is created |
|
76 if($created==true){ |
|
77 if($wp_request_done==false){ |
|
78 $wp_response = WikiTagUtils::getWikipediaInfo($tag_label_normalized); |
|
79 } |
|
80 $new_label = $wp_response['new_label']; |
|
81 $status = $wp_response['status']; |
|
82 $url = $wp_response['wikipedia_url']; |
|
83 $pageid = $wp_response['pageid']; |
|
84 $dbpedia_uri = $wp_response["dbpedia_uri"]; |
|
85 $wikipedia_revision_id = $wp_response['revision_id']; |
|
86 |
|
87 # We save the datas |
|
88 if($new_label!=null){ |
|
89 $tag->setLabel($new_label); |
|
90 } |
|
91 if($status!=null){ |
|
92 $tag->setUrlStatus($status); |
|
93 } |
|
94 $tag->setWikipediaUrl($url); |
|
95 $tag->setWikipediaPageId($pageid); |
|
96 $tag->setDbpediaUri($dbpedia_uri); |
|
97 |
|
98 // Save datas. |
|
99 $em = $doctrine->getEntityManager(); |
|
100 $em->persist($tag); |
|
101 $em->flush(); |
|
102 |
|
103 } |
|
104 else if($tag!=null && $tag->getWikipediaPageId()!=null){ |
|
105 $wp_response = WikiTagUtils::getWikipediaInfo(null, $tag->getWikipediaPageId()); |
|
106 $wikipedia_revision_id = $wp_response['revision_id']; |
|
107 } |
|
108 else{ |
|
109 $wikipedia_revision_id = null; |
|
110 } |
|
111 |
|
112 return array($tag, $wikipedia_revision_id, $created);//, $wpReponse); |
|
113 } |
|
114 |
|
115 /** |
|
116 * Cleans the tag label |
16 * Cleans the tag label |
117 */ |
17 */ |
118 public static function normalizeTag($tag_label) |
18 public static function normalizeTag($tag_label) |
119 { |
19 { |
120 if(strlen($tag_label)==0){ |
20 if(strlen($tag_label)==0){ |
121 return $tag_label; |
21 return $tag_label; |
122 } |
22 } |
123 $tag_label = trim($tag_label);//tag.strip() |
23 $tag_label = trim($tag_label);//tag.strip() |
124 $tag_label = str_replace("_", " ", $tag_label);//tag.replace("_", " ") |
24 $tag_label = str_replace("_", " ", $tag_label);//tag.replace("_", " ") |
125 $tag_label = str_replace("Œ", "oe", $tag_label); |
25 $tag_label = preg_replace('/\s+/u', ' ', $tag_label);//" ".join(tag.split()) |
126 $tag_label = str_replace("œ", "oe", $tag_label); |
|
127 $tag_label = preg_replace('/\s+/', ' ', $tag_label);//" ".join(tag.split()) |
|
128 $tag_label = ucfirst($tag_label);//tag[0].upper() + tag[1:] |
26 $tag_label = ucfirst($tag_label);//tag[0].upper() + tag[1:] |
129 return $tag_label; |
27 return $tag_label; |
130 } |
28 } |
131 |
29 |
132 /** |
30 /** |
|
31 * Query wikipedia with a normalized label or a pageid |
|
32 * return an array with the form |
|
33 * array( |
|
34 * 'new_label'=>$new_label, |
|
35 * 'alternative_label'=>$alternative_label, |
|
36 * 'status'=>$status, |
|
37 * 'wikipedia_url'=>$url, |
|
38 * 'wikipedia_alternative_url'=>$alternative_url, |
|
39 * 'pageid'=>$pageid, |
|
40 * 'alternative_pageid'=>$alternative_pageid, |
|
41 * 'dbpedia_uri'=>$dbpedia_uri, |
|
42 * 'revision_id'=> , |
|
43 * 'response'=> the original wikipedia json response |
|
44 * ) |
133 * |
45 * |
134 * TODO: Enter description here ... |
46 * @param string $tag_label_normalized |
135 * @param unknown_type $tag_label_normalized |
47 * @param bigint $page_id |
136 * @param unknown_type $page_id |
48 * @return array |
137 * @return multitype:NULL unknown |multitype:Ambigous <NULL, unknown> multitype:number mixed Ambigous <NULL, string> Ambigous <unknown, mixed> |
|
138 */ |
49 */ |
139 public static function getWikipediaInfo($tag_label_normalized, $page_id=null) |
50 public static function getWikipediaInfo($tag_label_normalized, $page_id=null) |
140 { |
51 { |
141 $params = array('action'=>'query', 'prop'=>'info|categories|langlinks', 'inprop'=>'url', 'lllimit'=>'500', 'cllimit'=>'500', 'rvprop'=>'ids', 'format'=>'json'); |
52 $params = array('action'=>'query', 'prop'=>'info|categories|langlinks', 'inprop'=>'url', 'lllimit'=>'500', 'cllimit'=>'500', 'rvprop'=>'ids', 'format'=>'json'); |
142 if($tag_label_normalized!=null){ |
53 if($tag_label_normalized!=null){ |
179 else{ |
90 else{ |
180 //return " MATCH"; |
91 //return " MATCH"; |
181 $status = Tag::$TAG_URL_STATUS_DICT["match"]; |
92 $status = Tag::$TAG_URL_STATUS_DICT["match"]; |
182 } |
93 } |
183 // In redirection, we have to get more datas by adding redirects=true to the params |
94 // In redirection, we have to get more datas by adding redirects=true to the params |
184 if($status==Tag::$TAG_URL_STATUS_DICT["redirection"]){ |
95 $alternative_label = null; |
|
96 $alternative_url = null; |
|
97 $alternative_pageid = null; |
|
98 if($status==Tag::$TAG_URL_STATUS_DICT["redirection"]) |
|
99 { |
185 $params['redirects'] = "true"; |
100 $params['redirects'] = "true"; |
186 $ar = WikiTagUtils::requestWikipedia($params); |
101 $ar = WikiTagUtils::requestWikipedia($params); |
187 $res = $ar[0]; |
102 $res = $ar[0]; |
188 $pages = $ar[1]; |
103 $pages = $ar[1]; |
189 #we know that we have at least one answer |
104 #we know that we have at least one answer |
190 if(count($pages)>1 || count($pages)==0){ |
105 if(count($pages)>1 || count($pages)==0){ |
191 return WikiTagUtils::returnNullResult($res); |
106 return WikiTagUtils::returnNullResult($res); |
192 } |
107 } |
193 // get first result |
108 // get first result |
194 $page = reset($pages); |
109 $page = reset($pages); |
|
110 $alternative_label = array_key_exists('title', $page) ? $page['title'] : null; |
|
111 $alternative_url = array_key_exists('fullurl', $page) ? $page['fullurl'] : null; |
|
112 $alternative_pageid = array_key_exists('pageid', $page) ? $page['pageid'] : null; |
195 } |
113 } |
196 |
114 |
197 $revision_id = $page['lastrevid']; |
115 $revision_id = $page['lastrevid']; |
198 |
116 |
199 // process language to extract the english label |
117 // process language to extract the english label |
212 $dbpedia_uri = null; |
130 $dbpedia_uri = null; |
213 if($english_label!=null && strpos($english_label, '#')===false){ |
131 if($english_label!=null && strpos($english_label, '#')===false){ |
214 $dbpedia_uri = WikiTagUtils::getDbpediaUri($english_label); |
132 $dbpedia_uri = WikiTagUtils::getDbpediaUri($english_label); |
215 } |
133 } |
216 |
134 |
217 $wp_response = array('new_label'=>$new_label, 'status'=>$status, 'wikipedia_url'=>$url, 'pageid'=>$pageid, 'dbpedia_uri'=>$dbpedia_uri, 'revision_id'=>$revision_id, 'response'=>$original_response); |
135 $wp_response = array( |
|
136 'new_label'=>$new_label, |
|
137 'alternative_label'=>$alternative_label, |
|
138 'status'=>$status, |
|
139 'wikipedia_url'=>$url, |
|
140 'wikipedia_alternative_url'=>$alternative_url, |
|
141 'pageid'=>$pageid, |
|
142 'alternative_pageid'=>$alternative_pageid, |
|
143 'dbpedia_uri'=>$dbpedia_uri, |
|
144 'revision_id'=>$revision_id, |
|
145 'response'=>$original_response); |
218 //return $url." <br/>RES = ".$res/*." <br/>DUMP = ".var_dump($pages)*/." <br/>COUNT = ".count($pages)." <br/>page = ".var_dump($page); |
146 //return $url." <br/>RES = ".$res/*." <br/>DUMP = ".var_dump($pages)*/." <br/>COUNT = ".count($pages)." <br/>page = ".var_dump($page); |
219 return $wp_response; |
147 return $wp_response; |
220 } |
148 } |
221 |
149 |
222 |
150 |
223 /** |
151 /** |
|
152 * build and do the request to Wikipedia. |
224 * |
153 * |
225 * TODO : Enter description here ... |
154 * @param array $params |
226 * @param unknown_type $params |
155 * @return array |
227 * @return multitype:unknown mixed |
|
228 */ |
156 */ |
229 private static function requestWikipedia($params) |
157 private static function requestWikipedia($params) |
230 { |
158 { |
231 $params_str = ''; |
159 $params_str = ''; |
232 foreach ($params as $key => $value) { |
160 foreach ($params as $key => $value) { |