wp/wp-content/plugins/wordpress-importer/parsers.php
changeset 7 cf61fcea0001
parent 0 d970ebf37754
--- a/wp/wp-content/plugins/wordpress-importer/parsers.php	Tue Jun 09 11:14:17 2015 +0000
+++ b/wp/wp-content/plugins/wordpress-importer/parsers.php	Mon Oct 14 17:39:30 2019 +0200
@@ -114,28 +114,46 @@
 		// grab cats, tags and terms
 		foreach ( $xml->xpath('/rss/channel/wp:category') as $term_arr ) {
 			$t = $term_arr->children( $namespaces['wp'] );
-			$categories[] = array(
+			$category = array(
 				'term_id' => (int) $t->term_id,
 				'category_nicename' => (string) $t->category_nicename,
 				'category_parent' => (string) $t->category_parent,
 				'cat_name' => (string) $t->cat_name,
 				'category_description' => (string) $t->category_description
 			);
+
+			foreach ( $t->termmeta as $meta ) {
+				$category['termmeta'][] = array(
+					'key' => (string) $meta->meta_key,
+					'value' => (string) $meta->meta_value
+				);
+			}
+
+			$categories[] = $category;
 		}
 
 		foreach ( $xml->xpath('/rss/channel/wp:tag') as $term_arr ) {
 			$t = $term_arr->children( $namespaces['wp'] );
-			$tags[] = array(
+			$tag = array(
 				'term_id' => (int) $t->term_id,
 				'tag_slug' => (string) $t->tag_slug,
 				'tag_name' => (string) $t->tag_name,
 				'tag_description' => (string) $t->tag_description
 			);
+
+			foreach ( $t->termmeta as $meta ) {
+				$tag['termmeta'][] = array(
+					'key' => (string) $meta->meta_key,
+					'value' => (string) $meta->meta_value
+				);
+			}
+
+			$tags[] = $tag;
 		}
 
 		foreach ( $xml->xpath('/rss/channel/wp:term') as $term_arr ) {
 			$t = $term_arr->children( $namespaces['wp'] );
-			$terms[] = array(
+			$term = array(
 				'term_id' => (int) $t->term_id,
 				'term_taxonomy' => (string) $t->term_taxonomy,
 				'slug' => (string) $t->term_slug,
@@ -143,6 +161,15 @@
 				'term_name' => (string) $t->term_name,
 				'term_description' => (string) $t->term_description
 			);
+
+			foreach ( $t->termmeta as $meta ) {
+				$term['termmeta'][] = array(
+					'key' => (string) $meta->meta_key,
+					'value' => (string) $meta->meta_value
+				);
+			}
+
+			$terms[] = $term;
 		}
 
 		// grab posts
@@ -204,7 +231,7 @@
 						);
 					}
 				}
-			
+
 				$post['comments'][] = array(
 					'comment_id' => (int) $comment->comment_id,
 					'comment_author' => (string) $comment->comment_author,
@@ -324,7 +351,11 @@
 		if ( ! trim( $cdata ) )
 			return;
 
-		$this->cdata .= trim( $cdata );
+		if ( false !== $this->in_tag || false !== $this->in_sub_tag ) {
+			$this->cdata .= $cdata;
+		} else {
+			$this->cdata .= trim( $cdata );
+		}
 	}
 
 	function tag_close( $parser, $tag ) {
@@ -401,16 +432,21 @@
 	var $terms = array();
 	var $base_url = '';
 
-	function WXR_Parser_Regex() {
-		$this->__construct();
-	}
-
 	function __construct() {
 		$this->has_gzip = is_callable( 'gzopen' );
 	}
 
 	function parse( $file ) {
-		$wxr_version = $in_post = false;
+		$wxr_version = $in_multiline = false;
+
+		$multiline_content = '';
+
+		$multiline_tags = array(
+			'item'        => array( 'posts', array( $this, 'process_post' ) ),
+			'wp:category' => array( 'categories', array( $this, 'process_category' ) ),
+			'wp:tag'      => array( 'tags', array( $this, 'process_tag' ) ),
+			'wp:term'     => array( 'terms', array( $this, 'process_term' ) ),
+		);
 
 		$fp = $this->fopen( $file, 'r' );
 		if ( $fp ) {
@@ -425,39 +461,37 @@
 					$this->base_url = $url[1];
 					continue;
 				}
-				if ( false !== strpos( $importline, '<wp:category>' ) ) {
-					preg_match( '|<wp:category>(.*?)</wp:category>|is', $importline, $category );
-					$this->categories[] = $this->process_category( $category[1] );
-					continue;
-				}
-				if ( false !== strpos( $importline, '<wp:tag>' ) ) {
-					preg_match( '|<wp:tag>(.*?)</wp:tag>|is', $importline, $tag );
-					$this->tags[] = $this->process_tag( $tag[1] );
-					continue;
-				}
-				if ( false !== strpos( $importline, '<wp:term>' ) ) {
-					preg_match( '|<wp:term>(.*?)</wp:term>|is', $importline, $term );
-					$this->terms[] = $this->process_term( $term[1] );
-					continue;
-				}
+
 				if ( false !== strpos( $importline, '<wp:author>' ) ) {
 					preg_match( '|<wp:author>(.*?)</wp:author>|is', $importline, $author );
 					$a = $this->process_author( $author[1] );
 					$this->authors[$a['author_login']] = $a;
 					continue;
 				}
-				if ( false !== strpos( $importline, '<item>' ) ) {
-					$post = '';
-					$in_post = true;
-					continue;
+
+				foreach ( $multiline_tags as $tag => $handler ) {
+					// Handle multi-line tags on a singular line
+					if ( preg_match( '|<' . $tag . '>(.*?)</' . $tag . '>|is', $importline, $matches ) ) {
+						$this->{$handler[0]}[] = call_user_func( $handler[1], $matches[1] );
+
+					} elseif ( false !== ( $pos = strpos( $importline, "<$tag>" ) ) ) {
+						// Take note of any content after the opening tag
+						$multiline_content = trim( substr( $importline, $pos + strlen( $tag ) + 2 ) );
+
+						// We don't want to have this line added to `$is_multiline` below.
+						$importline        = '';
+						$in_multiline      = $tag;
+
+					} elseif ( false !== ( $pos = strpos( $importline, "</$tag>" ) ) ) {
+						$in_multiline          = false;
+						$multiline_content    .= trim( substr( $importline, 0, $pos ) );
+
+						$this->{$handler[0]}[] = call_user_func( $handler[1], $multiline_content );
+					}
 				}
-				if ( false !== strpos( $importline, '</item>' ) ) {
-					$in_post = false;
-					$this->posts[] = $this->process_post( $post );
-					continue;
-				}
-				if ( $in_post ) {
-					$post .= $importline . "\n";
+
+				if ( $in_multiline && $importline ) {
+					$multiline_content .= $importline . "\n";
 				}
 			}