wp/wp-content/plugins/wordpress-importer/parsers.php
changeset 7 cf61fcea0001
parent 0 d970ebf37754
equal deleted inserted replaced
6:490d5cc509ed 7:cf61fcea0001
   112 		}
   112 		}
   113 
   113 
   114 		// grab cats, tags and terms
   114 		// grab cats, tags and terms
   115 		foreach ( $xml->xpath('/rss/channel/wp:category') as $term_arr ) {
   115 		foreach ( $xml->xpath('/rss/channel/wp:category') as $term_arr ) {
   116 			$t = $term_arr->children( $namespaces['wp'] );
   116 			$t = $term_arr->children( $namespaces['wp'] );
   117 			$categories[] = array(
   117 			$category = array(
   118 				'term_id' => (int) $t->term_id,
   118 				'term_id' => (int) $t->term_id,
   119 				'category_nicename' => (string) $t->category_nicename,
   119 				'category_nicename' => (string) $t->category_nicename,
   120 				'category_parent' => (string) $t->category_parent,
   120 				'category_parent' => (string) $t->category_parent,
   121 				'cat_name' => (string) $t->cat_name,
   121 				'cat_name' => (string) $t->cat_name,
   122 				'category_description' => (string) $t->category_description
   122 				'category_description' => (string) $t->category_description
   123 			);
   123 			);
       
   124 
       
   125 			foreach ( $t->termmeta as $meta ) {
       
   126 				$category['termmeta'][] = array(
       
   127 					'key' => (string) $meta->meta_key,
       
   128 					'value' => (string) $meta->meta_value
       
   129 				);
       
   130 			}
       
   131 
       
   132 			$categories[] = $category;
   124 		}
   133 		}
   125 
   134 
   126 		foreach ( $xml->xpath('/rss/channel/wp:tag') as $term_arr ) {
   135 		foreach ( $xml->xpath('/rss/channel/wp:tag') as $term_arr ) {
   127 			$t = $term_arr->children( $namespaces['wp'] );
   136 			$t = $term_arr->children( $namespaces['wp'] );
   128 			$tags[] = array(
   137 			$tag = array(
   129 				'term_id' => (int) $t->term_id,
   138 				'term_id' => (int) $t->term_id,
   130 				'tag_slug' => (string) $t->tag_slug,
   139 				'tag_slug' => (string) $t->tag_slug,
   131 				'tag_name' => (string) $t->tag_name,
   140 				'tag_name' => (string) $t->tag_name,
   132 				'tag_description' => (string) $t->tag_description
   141 				'tag_description' => (string) $t->tag_description
   133 			);
   142 			);
       
   143 
       
   144 			foreach ( $t->termmeta as $meta ) {
       
   145 				$tag['termmeta'][] = array(
       
   146 					'key' => (string) $meta->meta_key,
       
   147 					'value' => (string) $meta->meta_value
       
   148 				);
       
   149 			}
       
   150 
       
   151 			$tags[] = $tag;
   134 		}
   152 		}
   135 
   153 
   136 		foreach ( $xml->xpath('/rss/channel/wp:term') as $term_arr ) {
   154 		foreach ( $xml->xpath('/rss/channel/wp:term') as $term_arr ) {
   137 			$t = $term_arr->children( $namespaces['wp'] );
   155 			$t = $term_arr->children( $namespaces['wp'] );
   138 			$terms[] = array(
   156 			$term = array(
   139 				'term_id' => (int) $t->term_id,
   157 				'term_id' => (int) $t->term_id,
   140 				'term_taxonomy' => (string) $t->term_taxonomy,
   158 				'term_taxonomy' => (string) $t->term_taxonomy,
   141 				'slug' => (string) $t->term_slug,
   159 				'slug' => (string) $t->term_slug,
   142 				'term_parent' => (string) $t->term_parent,
   160 				'term_parent' => (string) $t->term_parent,
   143 				'term_name' => (string) $t->term_name,
   161 				'term_name' => (string) $t->term_name,
   144 				'term_description' => (string) $t->term_description
   162 				'term_description' => (string) $t->term_description
   145 			);
   163 			);
       
   164 
       
   165 			foreach ( $t->termmeta as $meta ) {
       
   166 				$term['termmeta'][] = array(
       
   167 					'key' => (string) $meta->meta_key,
       
   168 					'value' => (string) $meta->meta_value
       
   169 				);
       
   170 			}
       
   171 
       
   172 			$terms[] = $term;
   146 		}
   173 		}
   147 
   174 
   148 		// grab posts
   175 		// grab posts
   149 		foreach ( $xml->channel->item as $item ) {
   176 		foreach ( $xml->channel->item as $item ) {
   150 			$post = array(
   177 			$post = array(
   202 							'key' => (string) $m->meta_key,
   229 							'key' => (string) $m->meta_key,
   203 							'value' => (string) $m->meta_value
   230 							'value' => (string) $m->meta_value
   204 						);
   231 						);
   205 					}
   232 					}
   206 				}
   233 				}
   207 			
   234 
   208 				$post['comments'][] = array(
   235 				$post['comments'][] = array(
   209 					'comment_id' => (int) $comment->comment_id,
   236 					'comment_id' => (int) $comment->comment_id,
   210 					'comment_author' => (string) $comment->comment_author,
   237 					'comment_author' => (string) $comment->comment_author,
   211 					'comment_author_email' => (string) $comment->comment_author_email,
   238 					'comment_author_email' => (string) $comment->comment_author_email,
   212 					'comment_author_IP' => (string) $comment->comment_author_IP,
   239 					'comment_author_IP' => (string) $comment->comment_author_IP,
   322 
   349 
   323 	function cdata( $parser, $cdata ) {
   350 	function cdata( $parser, $cdata ) {
   324 		if ( ! trim( $cdata ) )
   351 		if ( ! trim( $cdata ) )
   325 			return;
   352 			return;
   326 
   353 
   327 		$this->cdata .= trim( $cdata );
   354 		if ( false !== $this->in_tag || false !== $this->in_sub_tag ) {
       
   355 			$this->cdata .= $cdata;
       
   356 		} else {
       
   357 			$this->cdata .= trim( $cdata );
       
   358 		}
   328 	}
   359 	}
   329 
   360 
   330 	function tag_close( $parser, $tag ) {
   361 	function tag_close( $parser, $tag ) {
   331 		switch ( $tag ) {
   362 		switch ( $tag ) {
   332 			case 'wp:comment':
   363 			case 'wp:comment':
   399 	var $categories = array();
   430 	var $categories = array();
   400 	var $tags = array();
   431 	var $tags = array();
   401 	var $terms = array();
   432 	var $terms = array();
   402 	var $base_url = '';
   433 	var $base_url = '';
   403 
   434 
   404 	function WXR_Parser_Regex() {
       
   405 		$this->__construct();
       
   406 	}
       
   407 
       
   408 	function __construct() {
   435 	function __construct() {
   409 		$this->has_gzip = is_callable( 'gzopen' );
   436 		$this->has_gzip = is_callable( 'gzopen' );
   410 	}
   437 	}
   411 
   438 
   412 	function parse( $file ) {
   439 	function parse( $file ) {
   413 		$wxr_version = $in_post = false;
   440 		$wxr_version = $in_multiline = false;
       
   441 
       
   442 		$multiline_content = '';
       
   443 
       
   444 		$multiline_tags = array(
       
   445 			'item'        => array( 'posts', array( $this, 'process_post' ) ),
       
   446 			'wp:category' => array( 'categories', array( $this, 'process_category' ) ),
       
   447 			'wp:tag'      => array( 'tags', array( $this, 'process_tag' ) ),
       
   448 			'wp:term'     => array( 'terms', array( $this, 'process_term' ) ),
       
   449 		);
   414 
   450 
   415 		$fp = $this->fopen( $file, 'r' );
   451 		$fp = $this->fopen( $file, 'r' );
   416 		if ( $fp ) {
   452 		if ( $fp ) {
   417 			while ( ! $this->feof( $fp ) ) {
   453 			while ( ! $this->feof( $fp ) ) {
   418 				$importline = rtrim( $this->fgets( $fp ) );
   454 				$importline = rtrim( $this->fgets( $fp ) );
   423 				if ( false !== strpos( $importline, '<wp:base_site_url>' ) ) {
   459 				if ( false !== strpos( $importline, '<wp:base_site_url>' ) ) {
   424 					preg_match( '|<wp:base_site_url>(.*?)</wp:base_site_url>|is', $importline, $url );
   460 					preg_match( '|<wp:base_site_url>(.*?)</wp:base_site_url>|is', $importline, $url );
   425 					$this->base_url = $url[1];
   461 					$this->base_url = $url[1];
   426 					continue;
   462 					continue;
   427 				}
   463 				}
   428 				if ( false !== strpos( $importline, '<wp:category>' ) ) {
   464 
   429 					preg_match( '|<wp:category>(.*?)</wp:category>|is', $importline, $category );
       
   430 					$this->categories[] = $this->process_category( $category[1] );
       
   431 					continue;
       
   432 				}
       
   433 				if ( false !== strpos( $importline, '<wp:tag>' ) ) {
       
   434 					preg_match( '|<wp:tag>(.*?)</wp:tag>|is', $importline, $tag );
       
   435 					$this->tags[] = $this->process_tag( $tag[1] );
       
   436 					continue;
       
   437 				}
       
   438 				if ( false !== strpos( $importline, '<wp:term>' ) ) {
       
   439 					preg_match( '|<wp:term>(.*?)</wp:term>|is', $importline, $term );
       
   440 					$this->terms[] = $this->process_term( $term[1] );
       
   441 					continue;
       
   442 				}
       
   443 				if ( false !== strpos( $importline, '<wp:author>' ) ) {
   465 				if ( false !== strpos( $importline, '<wp:author>' ) ) {
   444 					preg_match( '|<wp:author>(.*?)</wp:author>|is', $importline, $author );
   466 					preg_match( '|<wp:author>(.*?)</wp:author>|is', $importline, $author );
   445 					$a = $this->process_author( $author[1] );
   467 					$a = $this->process_author( $author[1] );
   446 					$this->authors[$a['author_login']] = $a;
   468 					$this->authors[$a['author_login']] = $a;
   447 					continue;
   469 					continue;
   448 				}
   470 				}
   449 				if ( false !== strpos( $importline, '<item>' ) ) {
   471 
   450 					$post = '';
   472 				foreach ( $multiline_tags as $tag => $handler ) {
   451 					$in_post = true;
   473 					// Handle multi-line tags on a singular line
   452 					continue;
   474 					if ( preg_match( '|<' . $tag . '>(.*?)</' . $tag . '>|is', $importline, $matches ) ) {
   453 				}
   475 						$this->{$handler[0]}[] = call_user_func( $handler[1], $matches[1] );
   454 				if ( false !== strpos( $importline, '</item>' ) ) {
   476 
   455 					$in_post = false;
   477 					} elseif ( false !== ( $pos = strpos( $importline, "<$tag>" ) ) ) {
   456 					$this->posts[] = $this->process_post( $post );
   478 						// Take note of any content after the opening tag
   457 					continue;
   479 						$multiline_content = trim( substr( $importline, $pos + strlen( $tag ) + 2 ) );
   458 				}
   480 
   459 				if ( $in_post ) {
   481 						// We don't want to have this line added to `$is_multiline` below.
   460 					$post .= $importline . "\n";
   482 						$importline        = '';
       
   483 						$in_multiline      = $tag;
       
   484 
       
   485 					} elseif ( false !== ( $pos = strpos( $importline, "</$tag>" ) ) ) {
       
   486 						$in_multiline          = false;
       
   487 						$multiline_content    .= trim( substr( $importline, 0, $pos ) );
       
   488 
       
   489 						$this->{$handler[0]}[] = call_user_func( $handler[1], $multiline_content );
       
   490 					}
       
   491 				}
       
   492 
       
   493 				if ( $in_multiline && $importline ) {
       
   494 					$multiline_content .= $importline . "\n";
   461 				}
   495 				}
   462 			}
   496 			}
   463 
   497 
   464 			$this->fclose($fp);
   498 			$this->fclose($fp);
   465 		}
   499 		}