diff -r 490d5cc509ed -r cf61fcea0001 wp/wp-content/plugins/wordpress-importer/parsers.php --- a/wp/wp-content/plugins/wordpress-importer/parsers.php Tue Jun 09 11:14:17 2015 +0000 +++ b/wp/wp-content/plugins/wordpress-importer/parsers.php Mon Oct 14 17:39:30 2019 +0200 @@ -114,28 +114,46 @@ // grab cats, tags and terms foreach ( $xml->xpath('/rss/channel/wp:category') as $term_arr ) { $t = $term_arr->children( $namespaces['wp'] ); - $categories[] = array( + $category = array( 'term_id' => (int) $t->term_id, 'category_nicename' => (string) $t->category_nicename, 'category_parent' => (string) $t->category_parent, 'cat_name' => (string) $t->cat_name, 'category_description' => (string) $t->category_description ); + + foreach ( $t->termmeta as $meta ) { + $category['termmeta'][] = array( + 'key' => (string) $meta->meta_key, + 'value' => (string) $meta->meta_value + ); + } + + $categories[] = $category; } foreach ( $xml->xpath('/rss/channel/wp:tag') as $term_arr ) { $t = $term_arr->children( $namespaces['wp'] ); - $tags[] = array( + $tag = array( 'term_id' => (int) $t->term_id, 'tag_slug' => (string) $t->tag_slug, 'tag_name' => (string) $t->tag_name, 'tag_description' => (string) $t->tag_description ); + + foreach ( $t->termmeta as $meta ) { + $tag['termmeta'][] = array( + 'key' => (string) $meta->meta_key, + 'value' => (string) $meta->meta_value + ); + } + + $tags[] = $tag; } foreach ( $xml->xpath('/rss/channel/wp:term') as $term_arr ) { $t = $term_arr->children( $namespaces['wp'] ); - $terms[] = array( + $term = array( 'term_id' => (int) $t->term_id, 'term_taxonomy' => (string) $t->term_taxonomy, 'slug' => (string) $t->term_slug, @@ -143,6 +161,15 @@ 'term_name' => (string) $t->term_name, 'term_description' => (string) $t->term_description ); + + foreach ( $t->termmeta as $meta ) { + $term['termmeta'][] = array( + 'key' => (string) $meta->meta_key, + 'value' => (string) $meta->meta_value + ); + } + + $terms[] = $term; } // grab posts @@ -204,7 +231,7 @@ ); } } - + $post['comments'][] = array( 'comment_id' => (int) $comment->comment_id, 'comment_author' => (string) $comment->comment_author, @@ -324,7 +351,11 @@ if ( ! trim( $cdata ) ) return; - $this->cdata .= trim( $cdata ); + if ( false !== $this->in_tag || false !== $this->in_sub_tag ) { + $this->cdata .= $cdata; + } else { + $this->cdata .= trim( $cdata ); + } } function tag_close( $parser, $tag ) { @@ -401,16 +432,21 @@ var $terms = array(); var $base_url = ''; - function WXR_Parser_Regex() { - $this->__construct(); - } - function __construct() { $this->has_gzip = is_callable( 'gzopen' ); } function parse( $file ) { - $wxr_version = $in_post = false; + $wxr_version = $in_multiline = false; + + $multiline_content = ''; + + $multiline_tags = array( + 'item' => array( 'posts', array( $this, 'process_post' ) ), + 'wp:category' => array( 'categories', array( $this, 'process_category' ) ), + 'wp:tag' => array( 'tags', array( $this, 'process_tag' ) ), + 'wp:term' => array( 'terms', array( $this, 'process_term' ) ), + ); $fp = $this->fopen( $file, 'r' ); if ( $fp ) { @@ -425,39 +461,37 @@ $this->base_url = $url[1]; continue; } - if ( false !== strpos( $importline, '' ) ) { - preg_match( '|(.*?)|is', $importline, $category ); - $this->categories[] = $this->process_category( $category[1] ); - continue; - } - if ( false !== strpos( $importline, '' ) ) { - preg_match( '|(.*?)|is', $importline, $tag ); - $this->tags[] = $this->process_tag( $tag[1] ); - continue; - } - if ( false !== strpos( $importline, '' ) ) { - preg_match( '|(.*?)|is', $importline, $term ); - $this->terms[] = $this->process_term( $term[1] ); - continue; - } + if ( false !== strpos( $importline, '' ) ) { preg_match( '|(.*?)|is', $importline, $author ); $a = $this->process_author( $author[1] ); $this->authors[$a['author_login']] = $a; continue; } - if ( false !== strpos( $importline, '' ) ) { - $post = ''; - $in_post = true; - continue; + + foreach ( $multiline_tags as $tag => $handler ) { + // Handle multi-line tags on a singular line + if ( preg_match( '|<' . $tag . '>(.*?)|is', $importline, $matches ) ) { + $this->{$handler[0]}[] = call_user_func( $handler[1], $matches[1] ); + + } elseif ( false !== ( $pos = strpos( $importline, "<$tag>" ) ) ) { + // Take note of any content after the opening tag + $multiline_content = trim( substr( $importline, $pos + strlen( $tag ) + 2 ) ); + + // We don't want to have this line added to `$is_multiline` below. + $importline = ''; + $in_multiline = $tag; + + } elseif ( false !== ( $pos = strpos( $importline, "" ) ) ) { + $in_multiline = false; + $multiline_content .= trim( substr( $importline, 0, $pos ) ); + + $this->{$handler[0]}[] = call_user_func( $handler[1], $multiline_content ); + } } - if ( false !== strpos( $importline, '' ) ) { - $in_post = false; - $this->posts[] = $this->process_post( $post ); - continue; - } - if ( $in_post ) { - $post .= $importline . "\n"; + + if ( $in_multiline && $importline ) { + $multiline_content .= $importline . "\n"; } }