--- a/wp/wp-content/plugins/wordpress-importer/parsers.php Tue Jun 09 11:14:17 2015 +0000
+++ b/wp/wp-content/plugins/wordpress-importer/parsers.php Mon Oct 14 17:39:30 2019 +0200
@@ -114,28 +114,46 @@
// grab cats, tags and terms
foreach ( $xml->xpath('/rss/channel/wp:category') as $term_arr ) {
$t = $term_arr->children( $namespaces['wp'] );
- $categories[] = array(
+ $category = array(
'term_id' => (int) $t->term_id,
'category_nicename' => (string) $t->category_nicename,
'category_parent' => (string) $t->category_parent,
'cat_name' => (string) $t->cat_name,
'category_description' => (string) $t->category_description
);
+
+ foreach ( $t->termmeta as $meta ) {
+ $category['termmeta'][] = array(
+ 'key' => (string) $meta->meta_key,
+ 'value' => (string) $meta->meta_value
+ );
+ }
+
+ $categories[] = $category;
}
foreach ( $xml->xpath('/rss/channel/wp:tag') as $term_arr ) {
$t = $term_arr->children( $namespaces['wp'] );
- $tags[] = array(
+ $tag = array(
'term_id' => (int) $t->term_id,
'tag_slug' => (string) $t->tag_slug,
'tag_name' => (string) $t->tag_name,
'tag_description' => (string) $t->tag_description
);
+
+ foreach ( $t->termmeta as $meta ) {
+ $tag['termmeta'][] = array(
+ 'key' => (string) $meta->meta_key,
+ 'value' => (string) $meta->meta_value
+ );
+ }
+
+ $tags[] = $tag;
}
foreach ( $xml->xpath('/rss/channel/wp:term') as $term_arr ) {
$t = $term_arr->children( $namespaces['wp'] );
- $terms[] = array(
+ $term = array(
'term_id' => (int) $t->term_id,
'term_taxonomy' => (string) $t->term_taxonomy,
'slug' => (string) $t->term_slug,
@@ -143,6 +161,15 @@
'term_name' => (string) $t->term_name,
'term_description' => (string) $t->term_description
);
+
+ foreach ( $t->termmeta as $meta ) {
+ $term['termmeta'][] = array(
+ 'key' => (string) $meta->meta_key,
+ 'value' => (string) $meta->meta_value
+ );
+ }
+
+ $terms[] = $term;
}
// grab posts
@@ -204,7 +231,7 @@
);
}
}
-
+
$post['comments'][] = array(
'comment_id' => (int) $comment->comment_id,
'comment_author' => (string) $comment->comment_author,
@@ -324,7 +351,11 @@
if ( ! trim( $cdata ) )
return;
- $this->cdata .= trim( $cdata );
+ if ( false !== $this->in_tag || false !== $this->in_sub_tag ) {
+ $this->cdata .= $cdata;
+ } else {
+ $this->cdata .= trim( $cdata );
+ }
}
function tag_close( $parser, $tag ) {
@@ -401,16 +432,21 @@
var $terms = array();
var $base_url = '';
- function WXR_Parser_Regex() {
- $this->__construct();
- }
-
function __construct() {
$this->has_gzip = is_callable( 'gzopen' );
}
function parse( $file ) {
- $wxr_version = $in_post = false;
+ $wxr_version = $in_multiline = false;
+
+ $multiline_content = '';
+
+ $multiline_tags = array(
+ 'item' => array( 'posts', array( $this, 'process_post' ) ),
+ 'wp:category' => array( 'categories', array( $this, 'process_category' ) ),
+ 'wp:tag' => array( 'tags', array( $this, 'process_tag' ) ),
+ 'wp:term' => array( 'terms', array( $this, 'process_term' ) ),
+ );
$fp = $this->fopen( $file, 'r' );
if ( $fp ) {
@@ -425,39 +461,37 @@
$this->base_url = $url[1];
continue;
}
- if ( false !== strpos( $importline, '<wp:category>' ) ) {
- preg_match( '|<wp:category>(.*?)</wp:category>|is', $importline, $category );
- $this->categories[] = $this->process_category( $category[1] );
- continue;
- }
- if ( false !== strpos( $importline, '<wp:tag>' ) ) {
- preg_match( '|<wp:tag>(.*?)</wp:tag>|is', $importline, $tag );
- $this->tags[] = $this->process_tag( $tag[1] );
- continue;
- }
- if ( false !== strpos( $importline, '<wp:term>' ) ) {
- preg_match( '|<wp:term>(.*?)</wp:term>|is', $importline, $term );
- $this->terms[] = $this->process_term( $term[1] );
- continue;
- }
+
if ( false !== strpos( $importline, '<wp:author>' ) ) {
preg_match( '|<wp:author>(.*?)</wp:author>|is', $importline, $author );
$a = $this->process_author( $author[1] );
$this->authors[$a['author_login']] = $a;
continue;
}
- if ( false !== strpos( $importline, '<item>' ) ) {
- $post = '';
- $in_post = true;
- continue;
+
+ foreach ( $multiline_tags as $tag => $handler ) {
+ // Handle multi-line tags on a singular line
+ if ( preg_match( '|<' . $tag . '>(.*?)</' . $tag . '>|is', $importline, $matches ) ) {
+ $this->{$handler[0]}[] = call_user_func( $handler[1], $matches[1] );
+
+ } elseif ( false !== ( $pos = strpos( $importline, "<$tag>" ) ) ) {
+ // Take note of any content after the opening tag
+ $multiline_content = trim( substr( $importline, $pos + strlen( $tag ) + 2 ) );
+
+ // We don't want to have this line added to `$is_multiline` below.
+ $importline = '';
+ $in_multiline = $tag;
+
+ } elseif ( false !== ( $pos = strpos( $importline, "</$tag>" ) ) ) {
+ $in_multiline = false;
+ $multiline_content .= trim( substr( $importline, 0, $pos ) );
+
+ $this->{$handler[0]}[] = call_user_func( $handler[1], $multiline_content );
+ }
}
- if ( false !== strpos( $importline, '</item>' ) ) {
- $in_post = false;
- $this->posts[] = $this->process_post( $post );
- continue;
- }
- if ( $in_post ) {
- $post .= $importline . "\n";
+
+ if ( $in_multiline && $importline ) {
+ $multiline_content .= $importline . "\n";
}
}