author | ymh <ymh.work@gmail.com> |
Tue, 15 Dec 2020 13:49:49 +0100 | |
changeset 16 | a86126ab1dd4 |
parent 7 | cf61fcea0001 |
child 18 | be944660c56a |
permissions | -rw-r--r-- |
0 | 1 |
<?php |
2 |
/** |
|
3 |
* Atom Syndication Format PHP Library |
|
4 |
* |
|
5 |
* @package AtomLib |
|
6 |
* @link http://code.google.com/p/phpatomlib/ |
|
7 |
* |
|
8 |
* @author Elias Torres <elias@torrez.us> |
|
9 |
* @version 0.4 |
|
5 | 10 |
* @since 2.3.0 |
0 | 11 |
*/ |
12 |
||
13 |
/** |
|
14 |
* Structure that store common Atom Feed Properties |
|
15 |
* |
|
16 |
* @package AtomLib |
|
17 |
*/ |
|
18 |
class AtomFeed { |
|
19 |
/** |
|
20 |
* Stores Links |
|
21 |
* @var array |
|
22 |
* @access public |
|
23 |
*/ |
|
24 |
var $links = array(); |
|
25 |
/** |
|
26 |
* Stores Categories |
|
27 |
* @var array |
|
28 |
* @access public |
|
29 |
*/ |
|
30 |
var $categories = array(); |
|
31 |
/** |
|
32 |
* Stores Entries |
|
33 |
* |
|
34 |
* @var array |
|
35 |
* @access public |
|
36 |
*/ |
|
37 |
var $entries = array(); |
|
38 |
} |
|
39 |
||
40 |
/** |
|
41 |
* Structure that store Atom Entry Properties |
|
42 |
* |
|
43 |
* @package AtomLib |
|
44 |
*/ |
|
45 |
class AtomEntry { |
|
46 |
/** |
|
47 |
* Stores Links |
|
48 |
* @var array |
|
49 |
* @access public |
|
50 |
*/ |
|
51 |
var $links = array(); |
|
52 |
/** |
|
53 |
* Stores Categories |
|
54 |
* @var array |
|
55 |
* @access public |
|
56 |
*/ |
|
57 |
var $categories = array(); |
|
58 |
} |
|
59 |
||
60 |
/** |
|
61 |
* AtomLib Atom Parser API |
|
62 |
* |
|
63 |
* @package AtomLib |
|
64 |
*/ |
|
65 |
class AtomParser { |
|
66 |
||
67 |
var $NS = 'http://www.w3.org/2005/Atom'; |
|
68 |
var $ATOM_CONTENT_ELEMENTS = array('content','summary','title','subtitle','rights'); |
|
69 |
var $ATOM_SIMPLE_ELEMENTS = array('id','updated','published','draft'); |
|
70 |
||
71 |
var $debug = false; |
|
72 |
||
73 |
var $depth = 0; |
|
74 |
var $indent = 2; |
|
75 |
var $in_content; |
|
76 |
var $ns_contexts = array(); |
|
77 |
var $ns_decls = array(); |
|
78 |
var $content_ns_decls = array(); |
|
79 |
var $content_ns_contexts = array(); |
|
80 |
var $is_xhtml = false; |
|
81 |
var $is_html = false; |
|
82 |
var $is_text = true; |
|
83 |
var $skipped_div = false; |
|
84 |
||
85 |
var $FILE = "php://input"; |
|
86 |
||
87 |
var $feed; |
|
88 |
var $current; |
|
89 |
||
7
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
90 |
/** |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
91 |
* PHP5 constructor. |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
92 |
*/ |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
93 |
function __construct() { |
0 | 94 |
|
95 |
$this->feed = new AtomFeed(); |
|
96 |
$this->current = null; |
|
7
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
97 |
$this->map_attrs_func = array( __CLASS__, 'map_attrs' ); |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
98 |
$this->map_xmlns_func = array( __CLASS__, 'map_xmlns' ); |
0 | 99 |
} |
100 |
||
7
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
101 |
/** |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
102 |
* PHP4 constructor. |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
103 |
*/ |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
104 |
public function AtomParser() { |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
105 |
self::__construct(); |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
106 |
} |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
107 |
|
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
108 |
/** |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
109 |
* Map attributes to key="val" |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
110 |
* |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
111 |
* @param string $k Key |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
112 |
* @param string $v Value |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
113 |
* @return string |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
114 |
*/ |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
115 |
public static function map_attrs($k, $v) { |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
116 |
return "$k=\"$v\""; |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
117 |
} |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
118 |
|
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
119 |
/** |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
120 |
* Map XML namespace to string. |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
121 |
* |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
122 |
* @param indexish $p XML Namespace element index |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
123 |
* @param array $n Two-element array pair. [ 0 => {namespace}, 1 => {url} ] |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
124 |
* @return string 'xmlns="{url}"' or 'xmlns:{namespace}="{url}"' |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
125 |
*/ |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
126 |
public static function map_xmlns($p, $n) { |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
127 |
$xd = "xmlns"; |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
128 |
if( 0 < strlen($n[0]) ) { |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
129 |
$xd .= ":{$n[0]}"; |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
130 |
} |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
131 |
return "{$xd}=\"{$n[1]}\""; |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
132 |
} |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
133 |
|
0 | 134 |
function _p($msg) { |
135 |
if($this->debug) { |
|
136 |
print str_repeat(" ", $this->depth * $this->indent) . $msg ."\n"; |
|
137 |
} |
|
138 |
} |
|
139 |
||
140 |
function error_handler($log_level, $log_text, $error_file, $error_line) { |
|
141 |
$this->error = $log_text; |
|
142 |
} |
|
143 |
||
144 |
function parse() { |
|
145 |
||
146 |
set_error_handler(array(&$this, 'error_handler')); |
|
147 |
||
148 |
array_unshift($this->ns_contexts, array()); |
|
149 |
||
7
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
150 |
if ( ! function_exists( 'xml_parser_create_ns' ) ) { |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
151 |
trigger_error( __( "PHP's XML extension is not available. Please contact your hosting provider to enable PHP's XML extension." ) ); |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
152 |
return false; |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
153 |
} |
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
154 |
|
0 | 155 |
$parser = xml_parser_create_ns(); |
156 |
xml_set_object($parser, $this); |
|
157 |
xml_set_element_handler($parser, "start_element", "end_element"); |
|
158 |
xml_parser_set_option($parser,XML_OPTION_CASE_FOLDING,0); |
|
159 |
xml_parser_set_option($parser,XML_OPTION_SKIP_WHITE,0); |
|
160 |
xml_set_character_data_handler($parser, "cdata"); |
|
161 |
xml_set_default_handler($parser, "_default"); |
|
162 |
xml_set_start_namespace_decl_handler($parser, "start_ns"); |
|
163 |
xml_set_end_namespace_decl_handler($parser, "end_ns"); |
|
164 |
||
165 |
$this->content = ''; |
|
166 |
||
167 |
$ret = true; |
|
168 |
||
169 |
$fp = fopen($this->FILE, "r"); |
|
170 |
while ($data = fread($fp, 4096)) { |
|
171 |
if($this->debug) $this->content .= $data; |
|
172 |
||
173 |
if(!xml_parse($parser, $data, feof($fp))) { |
|
16 | 174 |
/* translators: 1: Error message, 2: Line number. */ |
7
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
175 |
trigger_error(sprintf(__('XML Error: %1$s at line %2$s')."\n", |
0 | 176 |
xml_error_string(xml_get_error_code($parser)), |
177 |
xml_get_current_line_number($parser))); |
|
178 |
$ret = false; |
|
179 |
break; |
|
180 |
} |
|
181 |
} |
|
182 |
fclose($fp); |
|
183 |
||
184 |
xml_parser_free($parser); |
|
16 | 185 |
unset($parser); |
0 | 186 |
|
187 |
restore_error_handler(); |
|
188 |
||
189 |
return $ret; |
|
190 |
} |
|
191 |
||
192 |
function start_element($parser, $name, $attrs) { |
|
193 |
||
7
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
194 |
$tag = array_pop(explode(":", $name)); |
0 | 195 |
|
196 |
switch($name) { |
|
197 |
case $this->NS . ':feed': |
|
198 |
$this->current = $this->feed; |
|
199 |
break; |
|
200 |
case $this->NS . ':entry': |
|
201 |
$this->current = new AtomEntry(); |
|
202 |
break; |
|
203 |
}; |
|
204 |
||
205 |
$this->_p("start_element('$name')"); |
|
206 |
#$this->_p(print_r($this->ns_contexts,true)); |
|
207 |
#$this->_p('current(' . $this->current . ')'); |
|
208 |
||
209 |
array_unshift($this->ns_contexts, $this->ns_decls); |
|
210 |
||
211 |
$this->depth++; |
|
212 |
||
213 |
if(!empty($this->in_content)) { |
|
214 |
||
215 |
$this->content_ns_decls = array(); |
|
216 |
||
217 |
if($this->is_html || $this->is_text) |
|
218 |
trigger_error("Invalid content in element found. Content must not be of type text or html if it contains markup."); |
|
219 |
||
220 |
$attrs_prefix = array(); |
|
221 |
||
222 |
// resolve prefixes for attributes |
|
223 |
foreach($attrs as $key => $value) { |
|
224 |
$with_prefix = $this->ns_to_prefix($key, true); |
|
225 |
$attrs_prefix[$with_prefix[1]] = $this->xml_escape($value); |
|
226 |
} |
|
227 |
||
228 |
$attrs_str = join(' ', array_map($this->map_attrs_func, array_keys($attrs_prefix), array_values($attrs_prefix))); |
|
229 |
if(strlen($attrs_str) > 0) { |
|
230 |
$attrs_str = " " . $attrs_str; |
|
231 |
} |
|
232 |
||
233 |
$with_prefix = $this->ns_to_prefix($name); |
|
234 |
||
235 |
if(!$this->is_declared_content_ns($with_prefix[0])) { |
|
236 |
array_push($this->content_ns_decls, $with_prefix[0]); |
|
237 |
} |
|
238 |
||
239 |
$xmlns_str = ''; |
|
240 |
if(count($this->content_ns_decls) > 0) { |
|
241 |
array_unshift($this->content_ns_contexts, $this->content_ns_decls); |
|
242 |
$xmlns_str .= join(' ', array_map($this->map_xmlns_func, array_keys($this->content_ns_contexts[0]), array_values($this->content_ns_contexts[0]))); |
|
243 |
if(strlen($xmlns_str) > 0) { |
|
244 |
$xmlns_str = " " . $xmlns_str; |
|
245 |
} |
|
246 |
} |
|
247 |
||
248 |
array_push($this->in_content, array($tag, $this->depth, "<". $with_prefix[1] ."{$xmlns_str}{$attrs_str}" . ">")); |
|
249 |
||
250 |
} else if(in_array($tag, $this->ATOM_CONTENT_ELEMENTS) || in_array($tag, $this->ATOM_SIMPLE_ELEMENTS)) { |
|
251 |
$this->in_content = array(); |
|
252 |
$this->is_xhtml = $attrs['type'] == 'xhtml'; |
|
253 |
$this->is_html = $attrs['type'] == 'html' || $attrs['type'] == 'text/html'; |
|
254 |
$this->is_text = !in_array('type',array_keys($attrs)) || $attrs['type'] == 'text'; |
|
255 |
$type = $this->is_xhtml ? 'XHTML' : ($this->is_html ? 'HTML' : ($this->is_text ? 'TEXT' : $attrs['type'])); |
|
256 |
||
257 |
if(in_array('src',array_keys($attrs))) { |
|
258 |
$this->current->$tag = $attrs; |
|
259 |
} else { |
|
260 |
array_push($this->in_content, array($tag,$this->depth, $type)); |
|
261 |
} |
|
262 |
} else if($tag == 'link') { |
|
263 |
array_push($this->current->links, $attrs); |
|
264 |
} else if($tag == 'category') { |
|
265 |
array_push($this->current->categories, $attrs); |
|
266 |
} |
|
267 |
||
268 |
$this->ns_decls = array(); |
|
269 |
} |
|
270 |
||
271 |
function end_element($parser, $name) { |
|
272 |
||
7
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
273 |
$tag = array_pop(explode(":", $name)); |
0 | 274 |
|
275 |
$ccount = count($this->in_content); |
|
276 |
||
277 |
# if we are *in* content, then let's proceed to serialize it |
|
278 |
if(!empty($this->in_content)) { |
|
279 |
# if we are ending the original content element |
|
280 |
# then let's finalize the content |
|
281 |
if($this->in_content[0][0] == $tag && |
|
282 |
$this->in_content[0][1] == $this->depth) { |
|
283 |
$origtype = $this->in_content[0][2]; |
|
284 |
array_shift($this->in_content); |
|
285 |
$newcontent = array(); |
|
286 |
foreach($this->in_content as $c) { |
|
287 |
if(count($c) == 3) { |
|
288 |
array_push($newcontent, $c[2]); |
|
289 |
} else { |
|
290 |
if($this->is_xhtml || $this->is_text) { |
|
291 |
array_push($newcontent, $this->xml_escape($c)); |
|
292 |
} else { |
|
293 |
array_push($newcontent, $c); |
|
294 |
} |
|
295 |
} |
|
296 |
} |
|
297 |
if(in_array($tag, $this->ATOM_CONTENT_ELEMENTS)) { |
|
298 |
$this->current->$tag = array($origtype, join('',$newcontent)); |
|
299 |
} else { |
|
300 |
$this->current->$tag = join('',$newcontent); |
|
301 |
} |
|
302 |
$this->in_content = array(); |
|
303 |
} else if($this->in_content[$ccount-1][0] == $tag && |
|
304 |
$this->in_content[$ccount-1][1] == $this->depth) { |
|
305 |
$this->in_content[$ccount-1][2] = substr($this->in_content[$ccount-1][2],0,-1) . "/>"; |
|
306 |
} else { |
|
307 |
# else, just finalize the current element's content |
|
308 |
$endtag = $this->ns_to_prefix($name); |
|
309 |
array_push($this->in_content, array($tag, $this->depth, "</$endtag[1]>")); |
|
310 |
} |
|
311 |
} |
|
312 |
||
313 |
array_shift($this->ns_contexts); |
|
314 |
||
315 |
$this->depth--; |
|
316 |
||
317 |
if($name == ($this->NS . ':entry')) { |
|
318 |
array_push($this->feed->entries, $this->current); |
|
319 |
$this->current = null; |
|
320 |
} |
|
321 |
||
322 |
$this->_p("end_element('$name')"); |
|
323 |
} |
|
324 |
||
325 |
function start_ns($parser, $prefix, $uri) { |
|
326 |
$this->_p("starting: " . $prefix . ":" . $uri); |
|
327 |
array_push($this->ns_decls, array($prefix,$uri)); |
|
328 |
} |
|
329 |
||
330 |
function end_ns($parser, $prefix) { |
|
331 |
$this->_p("ending: #" . $prefix . "#"); |
|
332 |
} |
|
333 |
||
334 |
function cdata($parser, $data) { |
|
335 |
$this->_p("data: #" . str_replace(array("\n"), array("\\n"), trim($data)) . "#"); |
|
336 |
if(!empty($this->in_content)) { |
|
337 |
array_push($this->in_content, $data); |
|
338 |
} |
|
339 |
} |
|
340 |
||
341 |
function _default($parser, $data) { |
|
342 |
# when does this gets called? |
|
343 |
} |
|
344 |
||
345 |
||
346 |
function ns_to_prefix($qname, $attr=false) { |
|
347 |
# split 'http://www.w3.org/1999/xhtml:div' into ('http','//www.w3.org/1999/xhtml','div') |
|
7
cf61fcea0001
resynchronize code repo with production
ymh <ymh.work@gmail.com>
parents:
5
diff
changeset
|
348 |
$components = explode(":", $qname); |
0 | 349 |
|
350 |
# grab the last one (e.g 'div') |
|
351 |
$name = array_pop($components); |
|
352 |
||
353 |
if(!empty($components)) { |
|
354 |
# re-join back the namespace component |
|
355 |
$ns = join(":",$components); |
|
356 |
foreach($this->ns_contexts as $context) { |
|
357 |
foreach($context as $mapping) { |
|
358 |
if($mapping[1] == $ns && strlen($mapping[0]) > 0) { |
|
359 |
return array($mapping, "$mapping[0]:$name"); |
|
360 |
} |
|
361 |
} |
|
362 |
} |
|
363 |
} |
|
364 |
||
365 |
if($attr) { |
|
366 |
return array(null, $name); |
|
367 |
} else { |
|
368 |
foreach($this->ns_contexts as $context) { |
|
369 |
foreach($context as $mapping) { |
|
370 |
if(strlen($mapping[0]) == 0) { |
|
371 |
return array($mapping, $name); |
|
372 |
} |
|
373 |
} |
|
374 |
} |
|
375 |
} |
|
376 |
} |
|
377 |
||
378 |
function is_declared_content_ns($new_mapping) { |
|
379 |
foreach($this->content_ns_contexts as $context) { |
|
380 |
foreach($context as $mapping) { |
|
381 |
if($new_mapping == $mapping) { |
|
382 |
return true; |
|
383 |
} |
|
384 |
} |
|
385 |
} |
|
386 |
return false; |
|
387 |
} |
|
388 |
||
389 |
function xml_escape($string) |
|
390 |
{ |
|
391 |
return str_replace(array('&','"',"'",'<','>'), |
|
392 |
array('&','"',''','<','>'), |
|
393 |
$string ); |
|
394 |
} |
|
395 |
} |