|
1 <?php |
|
2 /** |
|
3 * Atom Syndication Format PHP Library |
|
4 * |
|
5 * @package AtomLib |
|
6 * @link http://code.google.com/p/phpatomlib/ |
|
7 * |
|
8 * @author Elias Torres <elias@torrez.us> |
|
9 * @version 0.4 |
|
10 * @since 2.3 |
|
11 */ |
|
12 |
|
13 /** |
|
14 * Structure that store common Atom Feed Properties |
|
15 * |
|
16 * @package AtomLib |
|
17 */ |
|
18 class AtomFeed { |
|
19 /** |
|
20 * Stores Links |
|
21 * @var array |
|
22 * @access public |
|
23 */ |
|
24 var $links = array(); |
|
25 /** |
|
26 * Stores Categories |
|
27 * @var array |
|
28 * @access public |
|
29 */ |
|
30 var $categories = array(); |
|
31 /** |
|
32 * Stores Entries |
|
33 * |
|
34 * @var array |
|
35 * @access public |
|
36 */ |
|
37 var $entries = array(); |
|
38 } |
|
39 |
|
40 /** |
|
41 * Structure that store Atom Entry Properties |
|
42 * |
|
43 * @package AtomLib |
|
44 */ |
|
45 class AtomEntry { |
|
46 /** |
|
47 * Stores Links |
|
48 * @var array |
|
49 * @access public |
|
50 */ |
|
51 var $links = array(); |
|
52 /** |
|
53 * Stores Categories |
|
54 * @var array |
|
55 * @access public |
|
56 */ |
|
57 var $categories = array(); |
|
58 } |
|
59 |
|
60 /** |
|
61 * AtomLib Atom Parser API |
|
62 * |
|
63 * @package AtomLib |
|
64 */ |
|
65 class AtomParser { |
|
66 |
|
67 var $NS = 'http://www.w3.org/2005/Atom'; |
|
68 var $ATOM_CONTENT_ELEMENTS = array('content','summary','title','subtitle','rights'); |
|
69 var $ATOM_SIMPLE_ELEMENTS = array('id','updated','published','draft'); |
|
70 |
|
71 var $debug = false; |
|
72 |
|
73 var $depth = 0; |
|
74 var $indent = 2; |
|
75 var $in_content; |
|
76 var $ns_contexts = array(); |
|
77 var $ns_decls = array(); |
|
78 var $content_ns_decls = array(); |
|
79 var $content_ns_contexts = array(); |
|
80 var $is_xhtml = false; |
|
81 var $is_html = false; |
|
82 var $is_text = true; |
|
83 var $skipped_div = false; |
|
84 |
|
85 var $FILE = "php://input"; |
|
86 |
|
87 var $feed; |
|
88 var $current; |
|
89 |
|
90 function AtomParser() { |
|
91 |
|
92 $this->feed = new AtomFeed(); |
|
93 $this->current = null; |
|
94 $this->map_attrs_func = create_function('$k,$v', 'return "$k=\"$v\"";'); |
|
95 $this->map_xmlns_func = create_function('$p,$n', '$xd = "xmlns"; if(strlen($n[0])>0) $xd .= ":{$n[0]}"; return "{$xd}=\"{$n[1]}\"";'); |
|
96 } |
|
97 |
|
98 function _p($msg) { |
|
99 if($this->debug) { |
|
100 print str_repeat(" ", $this->depth * $this->indent) . $msg ."\n"; |
|
101 } |
|
102 } |
|
103 |
|
104 function error_handler($log_level, $log_text, $error_file, $error_line) { |
|
105 $this->error = $log_text; |
|
106 } |
|
107 |
|
108 function parse() { |
|
109 |
|
110 set_error_handler(array(&$this, 'error_handler')); |
|
111 |
|
112 array_unshift($this->ns_contexts, array()); |
|
113 |
|
114 $parser = xml_parser_create_ns(); |
|
115 xml_set_object($parser, $this); |
|
116 xml_set_element_handler($parser, "start_element", "end_element"); |
|
117 xml_parser_set_option($parser,XML_OPTION_CASE_FOLDING,0); |
|
118 xml_parser_set_option($parser,XML_OPTION_SKIP_WHITE,0); |
|
119 xml_set_character_data_handler($parser, "cdata"); |
|
120 xml_set_default_handler($parser, "_default"); |
|
121 xml_set_start_namespace_decl_handler($parser, "start_ns"); |
|
122 xml_set_end_namespace_decl_handler($parser, "end_ns"); |
|
123 |
|
124 $this->content = ''; |
|
125 |
|
126 $ret = true; |
|
127 |
|
128 $fp = fopen($this->FILE, "r"); |
|
129 while ($data = fread($fp, 4096)) { |
|
130 if($this->debug) $this->content .= $data; |
|
131 |
|
132 if(!xml_parse($parser, $data, feof($fp))) { |
|
133 trigger_error(sprintf(__('XML error: %s at line %d')."\n", |
|
134 xml_error_string(xml_get_error_code($xml_parser)), |
|
135 xml_get_current_line_number($xml_parser))); |
|
136 $ret = false; |
|
137 break; |
|
138 } |
|
139 } |
|
140 fclose($fp); |
|
141 |
|
142 xml_parser_free($parser); |
|
143 |
|
144 restore_error_handler(); |
|
145 |
|
146 return $ret; |
|
147 } |
|
148 |
|
149 function start_element($parser, $name, $attrs) { |
|
150 |
|
151 $tag = array_pop(split(":", $name)); |
|
152 |
|
153 switch($name) { |
|
154 case $this->NS . ':feed': |
|
155 $this->current = $this->feed; |
|
156 break; |
|
157 case $this->NS . ':entry': |
|
158 $this->current = new AtomEntry(); |
|
159 break; |
|
160 }; |
|
161 |
|
162 $this->_p("start_element('$name')"); |
|
163 #$this->_p(print_r($this->ns_contexts,true)); |
|
164 #$this->_p('current(' . $this->current . ')'); |
|
165 |
|
166 array_unshift($this->ns_contexts, $this->ns_decls); |
|
167 |
|
168 $this->depth++; |
|
169 |
|
170 if(!empty($this->in_content)) { |
|
171 |
|
172 $this->content_ns_decls = array(); |
|
173 |
|
174 if($this->is_html || $this->is_text) |
|
175 trigger_error("Invalid content in element found. Content must not be of type text or html if it contains markup."); |
|
176 |
|
177 $attrs_prefix = array(); |
|
178 |
|
179 // resolve prefixes for attributes |
|
180 foreach($attrs as $key => $value) { |
|
181 $with_prefix = $this->ns_to_prefix($key, true); |
|
182 $attrs_prefix[$with_prefix[1]] = $this->xml_escape($value); |
|
183 } |
|
184 |
|
185 $attrs_str = join(' ', array_map($this->map_attrs_func, array_keys($attrs_prefix), array_values($attrs_prefix))); |
|
186 if(strlen($attrs_str) > 0) { |
|
187 $attrs_str = " " . $attrs_str; |
|
188 } |
|
189 |
|
190 $with_prefix = $this->ns_to_prefix($name); |
|
191 |
|
192 if(!$this->is_declared_content_ns($with_prefix[0])) { |
|
193 array_push($this->content_ns_decls, $with_prefix[0]); |
|
194 } |
|
195 |
|
196 $xmlns_str = ''; |
|
197 if(count($this->content_ns_decls) > 0) { |
|
198 array_unshift($this->content_ns_contexts, $this->content_ns_decls); |
|
199 $xmlns_str .= join(' ', array_map($this->map_xmlns_func, array_keys($this->content_ns_contexts[0]), array_values($this->content_ns_contexts[0]))); |
|
200 if(strlen($xmlns_str) > 0) { |
|
201 $xmlns_str = " " . $xmlns_str; |
|
202 } |
|
203 } |
|
204 |
|
205 array_push($this->in_content, array($tag, $this->depth, "<". $with_prefix[1] ."{$xmlns_str}{$attrs_str}" . ">")); |
|
206 |
|
207 } else if(in_array($tag, $this->ATOM_CONTENT_ELEMENTS) || in_array($tag, $this->ATOM_SIMPLE_ELEMENTS)) { |
|
208 $this->in_content = array(); |
|
209 $this->is_xhtml = $attrs['type'] == 'xhtml'; |
|
210 $this->is_html = $attrs['type'] == 'html' || $attrs['type'] == 'text/html'; |
|
211 $this->is_text = !in_array('type',array_keys($attrs)) || $attrs['type'] == 'text'; |
|
212 $type = $this->is_xhtml ? 'XHTML' : ($this->is_html ? 'HTML' : ($this->is_text ? 'TEXT' : $attrs['type'])); |
|
213 |
|
214 if(in_array('src',array_keys($attrs))) { |
|
215 $this->current->$tag = $attrs; |
|
216 } else { |
|
217 array_push($this->in_content, array($tag,$this->depth, $type)); |
|
218 } |
|
219 } else if($tag == 'link') { |
|
220 array_push($this->current->links, $attrs); |
|
221 } else if($tag == 'category') { |
|
222 array_push($this->current->categories, $attrs); |
|
223 } |
|
224 |
|
225 $this->ns_decls = array(); |
|
226 } |
|
227 |
|
228 function end_element($parser, $name) { |
|
229 |
|
230 $tag = array_pop(split(":", $name)); |
|
231 |
|
232 $ccount = count($this->in_content); |
|
233 |
|
234 # if we are *in* content, then let's proceed to serialize it |
|
235 if(!empty($this->in_content)) { |
|
236 # if we are ending the original content element |
|
237 # then let's finalize the content |
|
238 if($this->in_content[0][0] == $tag && |
|
239 $this->in_content[0][1] == $this->depth) { |
|
240 $origtype = $this->in_content[0][2]; |
|
241 array_shift($this->in_content); |
|
242 $newcontent = array(); |
|
243 foreach($this->in_content as $c) { |
|
244 if(count($c) == 3) { |
|
245 array_push($newcontent, $c[2]); |
|
246 } else { |
|
247 if($this->is_xhtml || $this->is_text) { |
|
248 array_push($newcontent, $this->xml_escape($c)); |
|
249 } else { |
|
250 array_push($newcontent, $c); |
|
251 } |
|
252 } |
|
253 } |
|
254 if(in_array($tag, $this->ATOM_CONTENT_ELEMENTS)) { |
|
255 $this->current->$tag = array($origtype, join('',$newcontent)); |
|
256 } else { |
|
257 $this->current->$tag = join('',$newcontent); |
|
258 } |
|
259 $this->in_content = array(); |
|
260 } else if($this->in_content[$ccount-1][0] == $tag && |
|
261 $this->in_content[$ccount-1][1] == $this->depth) { |
|
262 $this->in_content[$ccount-1][2] = substr($this->in_content[$ccount-1][2],0,-1) . "/>"; |
|
263 } else { |
|
264 # else, just finalize the current element's content |
|
265 $endtag = $this->ns_to_prefix($name); |
|
266 array_push($this->in_content, array($tag, $this->depth, "</$endtag[1]>")); |
|
267 } |
|
268 } |
|
269 |
|
270 array_shift($this->ns_contexts); |
|
271 |
|
272 $this->depth--; |
|
273 |
|
274 if($name == ($this->NS . ':entry')) { |
|
275 array_push($this->feed->entries, $this->current); |
|
276 $this->current = null; |
|
277 } |
|
278 |
|
279 $this->_p("end_element('$name')"); |
|
280 } |
|
281 |
|
282 function start_ns($parser, $prefix, $uri) { |
|
283 $this->_p("starting: " . $prefix . ":" . $uri); |
|
284 array_push($this->ns_decls, array($prefix,$uri)); |
|
285 } |
|
286 |
|
287 function end_ns($parser, $prefix) { |
|
288 $this->_p("ending: #" . $prefix . "#"); |
|
289 } |
|
290 |
|
291 function cdata($parser, $data) { |
|
292 $this->_p("data: #" . str_replace(array("\n"), array("\\n"), trim($data)) . "#"); |
|
293 if(!empty($this->in_content)) { |
|
294 array_push($this->in_content, $data); |
|
295 } |
|
296 } |
|
297 |
|
298 function _default($parser, $data) { |
|
299 # when does this gets called? |
|
300 } |
|
301 |
|
302 |
|
303 function ns_to_prefix($qname, $attr=false) { |
|
304 # split 'http://www.w3.org/1999/xhtml:div' into ('http','//www.w3.org/1999/xhtml','div') |
|
305 $components = split(":", $qname); |
|
306 |
|
307 # grab the last one (e.g 'div') |
|
308 $name = array_pop($components); |
|
309 |
|
310 if(!empty($components)) { |
|
311 # re-join back the namespace component |
|
312 $ns = join(":",$components); |
|
313 foreach($this->ns_contexts as $context) { |
|
314 foreach($context as $mapping) { |
|
315 if($mapping[1] == $ns && strlen($mapping[0]) > 0) { |
|
316 return array($mapping, "$mapping[0]:$name"); |
|
317 } |
|
318 } |
|
319 } |
|
320 } |
|
321 |
|
322 if($attr) { |
|
323 return array(null, $name); |
|
324 } else { |
|
325 foreach($this->ns_contexts as $context) { |
|
326 foreach($context as $mapping) { |
|
327 if(strlen($mapping[0]) == 0) { |
|
328 return array($mapping, $name); |
|
329 } |
|
330 } |
|
331 } |
|
332 } |
|
333 } |
|
334 |
|
335 function is_declared_content_ns($new_mapping) { |
|
336 foreach($this->content_ns_contexts as $context) { |
|
337 foreach($context as $mapping) { |
|
338 if($new_mapping == $mapping) { |
|
339 return true; |
|
340 } |
|
341 } |
|
342 } |
|
343 return false; |
|
344 } |
|
345 |
|
346 function xml_escape($string) |
|
347 { |
|
348 return str_replace(array('&','"',"'",'<','>'), |
|
349 array('&','"',''','<','>'), |
|
350 $string ); |
|
351 } |
|
352 } |
|
353 |
|
354 ?> |