112 } |
112 } |
113 |
113 |
114 // grab cats, tags and terms |
114 // grab cats, tags and terms |
115 foreach ( $xml->xpath('/rss/channel/wp:category') as $term_arr ) { |
115 foreach ( $xml->xpath('/rss/channel/wp:category') as $term_arr ) { |
116 $t = $term_arr->children( $namespaces['wp'] ); |
116 $t = $term_arr->children( $namespaces['wp'] ); |
117 $categories[] = array( |
117 $category = array( |
118 'term_id' => (int) $t->term_id, |
118 'term_id' => (int) $t->term_id, |
119 'category_nicename' => (string) $t->category_nicename, |
119 'category_nicename' => (string) $t->category_nicename, |
120 'category_parent' => (string) $t->category_parent, |
120 'category_parent' => (string) $t->category_parent, |
121 'cat_name' => (string) $t->cat_name, |
121 'cat_name' => (string) $t->cat_name, |
122 'category_description' => (string) $t->category_description |
122 'category_description' => (string) $t->category_description |
123 ); |
123 ); |
|
124 |
|
125 foreach ( $t->termmeta as $meta ) { |
|
126 $category['termmeta'][] = array( |
|
127 'key' => (string) $meta->meta_key, |
|
128 'value' => (string) $meta->meta_value |
|
129 ); |
|
130 } |
|
131 |
|
132 $categories[] = $category; |
124 } |
133 } |
125 |
134 |
126 foreach ( $xml->xpath('/rss/channel/wp:tag') as $term_arr ) { |
135 foreach ( $xml->xpath('/rss/channel/wp:tag') as $term_arr ) { |
127 $t = $term_arr->children( $namespaces['wp'] ); |
136 $t = $term_arr->children( $namespaces['wp'] ); |
128 $tags[] = array( |
137 $tag = array( |
129 'term_id' => (int) $t->term_id, |
138 'term_id' => (int) $t->term_id, |
130 'tag_slug' => (string) $t->tag_slug, |
139 'tag_slug' => (string) $t->tag_slug, |
131 'tag_name' => (string) $t->tag_name, |
140 'tag_name' => (string) $t->tag_name, |
132 'tag_description' => (string) $t->tag_description |
141 'tag_description' => (string) $t->tag_description |
133 ); |
142 ); |
|
143 |
|
144 foreach ( $t->termmeta as $meta ) { |
|
145 $tag['termmeta'][] = array( |
|
146 'key' => (string) $meta->meta_key, |
|
147 'value' => (string) $meta->meta_value |
|
148 ); |
|
149 } |
|
150 |
|
151 $tags[] = $tag; |
134 } |
152 } |
135 |
153 |
136 foreach ( $xml->xpath('/rss/channel/wp:term') as $term_arr ) { |
154 foreach ( $xml->xpath('/rss/channel/wp:term') as $term_arr ) { |
137 $t = $term_arr->children( $namespaces['wp'] ); |
155 $t = $term_arr->children( $namespaces['wp'] ); |
138 $terms[] = array( |
156 $term = array( |
139 'term_id' => (int) $t->term_id, |
157 'term_id' => (int) $t->term_id, |
140 'term_taxonomy' => (string) $t->term_taxonomy, |
158 'term_taxonomy' => (string) $t->term_taxonomy, |
141 'slug' => (string) $t->term_slug, |
159 'slug' => (string) $t->term_slug, |
142 'term_parent' => (string) $t->term_parent, |
160 'term_parent' => (string) $t->term_parent, |
143 'term_name' => (string) $t->term_name, |
161 'term_name' => (string) $t->term_name, |
144 'term_description' => (string) $t->term_description |
162 'term_description' => (string) $t->term_description |
145 ); |
163 ); |
|
164 |
|
165 foreach ( $t->termmeta as $meta ) { |
|
166 $term['termmeta'][] = array( |
|
167 'key' => (string) $meta->meta_key, |
|
168 'value' => (string) $meta->meta_value |
|
169 ); |
|
170 } |
|
171 |
|
172 $terms[] = $term; |
146 } |
173 } |
147 |
174 |
148 // grab posts |
175 // grab posts |
149 foreach ( $xml->channel->item as $item ) { |
176 foreach ( $xml->channel->item as $item ) { |
150 $post = array( |
177 $post = array( |
202 'key' => (string) $m->meta_key, |
229 'key' => (string) $m->meta_key, |
203 'value' => (string) $m->meta_value |
230 'value' => (string) $m->meta_value |
204 ); |
231 ); |
205 } |
232 } |
206 } |
233 } |
207 |
234 |
208 $post['comments'][] = array( |
235 $post['comments'][] = array( |
209 'comment_id' => (int) $comment->comment_id, |
236 'comment_id' => (int) $comment->comment_id, |
210 'comment_author' => (string) $comment->comment_author, |
237 'comment_author' => (string) $comment->comment_author, |
211 'comment_author_email' => (string) $comment->comment_author_email, |
238 'comment_author_email' => (string) $comment->comment_author_email, |
212 'comment_author_IP' => (string) $comment->comment_author_IP, |
239 'comment_author_IP' => (string) $comment->comment_author_IP, |
399 var $categories = array(); |
430 var $categories = array(); |
400 var $tags = array(); |
431 var $tags = array(); |
401 var $terms = array(); |
432 var $terms = array(); |
402 var $base_url = ''; |
433 var $base_url = ''; |
403 |
434 |
404 function WXR_Parser_Regex() { |
|
405 $this->__construct(); |
|
406 } |
|
407 |
|
408 function __construct() { |
435 function __construct() { |
409 $this->has_gzip = is_callable( 'gzopen' ); |
436 $this->has_gzip = is_callable( 'gzopen' ); |
410 } |
437 } |
411 |
438 |
412 function parse( $file ) { |
439 function parse( $file ) { |
413 $wxr_version = $in_post = false; |
440 $wxr_version = $in_multiline = false; |
|
441 |
|
442 $multiline_content = ''; |
|
443 |
|
444 $multiline_tags = array( |
|
445 'item' => array( 'posts', array( $this, 'process_post' ) ), |
|
446 'wp:category' => array( 'categories', array( $this, 'process_category' ) ), |
|
447 'wp:tag' => array( 'tags', array( $this, 'process_tag' ) ), |
|
448 'wp:term' => array( 'terms', array( $this, 'process_term' ) ), |
|
449 ); |
414 |
450 |
415 $fp = $this->fopen( $file, 'r' ); |
451 $fp = $this->fopen( $file, 'r' ); |
416 if ( $fp ) { |
452 if ( $fp ) { |
417 while ( ! $this->feof( $fp ) ) { |
453 while ( ! $this->feof( $fp ) ) { |
418 $importline = rtrim( $this->fgets( $fp ) ); |
454 $importline = rtrim( $this->fgets( $fp ) ); |
423 if ( false !== strpos( $importline, '<wp:base_site_url>' ) ) { |
459 if ( false !== strpos( $importline, '<wp:base_site_url>' ) ) { |
424 preg_match( '|<wp:base_site_url>(.*?)</wp:base_site_url>|is', $importline, $url ); |
460 preg_match( '|<wp:base_site_url>(.*?)</wp:base_site_url>|is', $importline, $url ); |
425 $this->base_url = $url[1]; |
461 $this->base_url = $url[1]; |
426 continue; |
462 continue; |
427 } |
463 } |
428 if ( false !== strpos( $importline, '<wp:category>' ) ) { |
464 |
429 preg_match( '|<wp:category>(.*?)</wp:category>|is', $importline, $category ); |
|
430 $this->categories[] = $this->process_category( $category[1] ); |
|
431 continue; |
|
432 } |
|
433 if ( false !== strpos( $importline, '<wp:tag>' ) ) { |
|
434 preg_match( '|<wp:tag>(.*?)</wp:tag>|is', $importline, $tag ); |
|
435 $this->tags[] = $this->process_tag( $tag[1] ); |
|
436 continue; |
|
437 } |
|
438 if ( false !== strpos( $importline, '<wp:term>' ) ) { |
|
439 preg_match( '|<wp:term>(.*?)</wp:term>|is', $importline, $term ); |
|
440 $this->terms[] = $this->process_term( $term[1] ); |
|
441 continue; |
|
442 } |
|
443 if ( false !== strpos( $importline, '<wp:author>' ) ) { |
465 if ( false !== strpos( $importline, '<wp:author>' ) ) { |
444 preg_match( '|<wp:author>(.*?)</wp:author>|is', $importline, $author ); |
466 preg_match( '|<wp:author>(.*?)</wp:author>|is', $importline, $author ); |
445 $a = $this->process_author( $author[1] ); |
467 $a = $this->process_author( $author[1] ); |
446 $this->authors[$a['author_login']] = $a; |
468 $this->authors[$a['author_login']] = $a; |
447 continue; |
469 continue; |
448 } |
470 } |
449 if ( false !== strpos( $importline, '<item>' ) ) { |
471 |
450 $post = ''; |
472 foreach ( $multiline_tags as $tag => $handler ) { |
451 $in_post = true; |
473 // Handle multi-line tags on a singular line |
452 continue; |
474 if ( preg_match( '|<' . $tag . '>(.*?)</' . $tag . '>|is', $importline, $matches ) ) { |
453 } |
475 $this->{$handler[0]}[] = call_user_func( $handler[1], $matches[1] ); |
454 if ( false !== strpos( $importline, '</item>' ) ) { |
476 |
455 $in_post = false; |
477 } elseif ( false !== ( $pos = strpos( $importline, "<$tag>" ) ) ) { |
456 $this->posts[] = $this->process_post( $post ); |
478 // Take note of any content after the opening tag |
457 continue; |
479 $multiline_content = trim( substr( $importline, $pos + strlen( $tag ) + 2 ) ); |
458 } |
480 |
459 if ( $in_post ) { |
481 // We don't want to have this line added to `$is_multiline` below. |
460 $post .= $importline . "\n"; |
482 $importline = ''; |
|
483 $in_multiline = $tag; |
|
484 |
|
485 } elseif ( false !== ( $pos = strpos( $importline, "</$tag>" ) ) ) { |
|
486 $in_multiline = false; |
|
487 $multiline_content .= trim( substr( $importline, 0, $pos ) ); |
|
488 |
|
489 $this->{$handler[0]}[] = call_user_func( $handler[1], $multiline_content ); |
|
490 } |
|
491 } |
|
492 |
|
493 if ( $in_multiline && $importline ) { |
|
494 $multiline_content .= $importline . "\n"; |
461 } |
495 } |
462 } |
496 } |
463 |
497 |
464 $this->fclose($fp); |
498 $this->fclose($fp); |
465 } |
499 } |