wp/wp-includes/wp-diff.php
changeset 0 d970ebf37754
child 5 5e2f62d02dcd
equal deleted inserted replaced
-1:000000000000 0:d970ebf37754
       
     1 <?php
       
     2 /**
       
     3  * WordPress Diff bastard child of old MediaWiki Diff Formatter.
       
     4  *
       
     5  * Basically all that remains is the table structure and some method names.
       
     6  *
       
     7  * @package WordPress
       
     8  * @subpackage Diff
       
     9  */
       
    10 
       
    11 if ( !class_exists( 'Text_Diff' ) ) {
       
    12 	/** Text_Diff class */
       
    13 	require( dirname(__FILE__).'/Text/Diff.php' );
       
    14 	/** Text_Diff_Renderer class */
       
    15 	require( dirname(__FILE__).'/Text/Diff/Renderer.php' );
       
    16 	/** Text_Diff_Renderer_inline class */
       
    17 	require( dirname(__FILE__).'/Text/Diff/Renderer/inline.php' );
       
    18 }
       
    19 
       
    20 /**
       
    21  * Table renderer to display the diff lines.
       
    22  *
       
    23  * @since 2.6.0
       
    24  * @uses Text_Diff_Renderer Extends
       
    25  */
       
    26 class WP_Text_Diff_Renderer_Table extends Text_Diff_Renderer {
       
    27 
       
    28 	/**
       
    29 	 * @see Text_Diff_Renderer::_leading_context_lines
       
    30 	 * @var int
       
    31 	 * @access protected
       
    32 	 * @since 2.6.0
       
    33 	 */
       
    34 	var $_leading_context_lines  = 10000;
       
    35 
       
    36 	/**
       
    37 	 * @see Text_Diff_Renderer::_trailing_context_lines
       
    38 	 * @var int
       
    39 	 * @access protected
       
    40 	 * @since 2.6.0
       
    41 	 */
       
    42 	var $_trailing_context_lines = 10000;
       
    43 
       
    44 	/**
       
    45 	 * {@internal Missing Description}}
       
    46 	 *
       
    47 	 * @var float
       
    48 	 * @access protected
       
    49 	 * @since 2.6.0
       
    50 	 */
       
    51 	var $_diff_threshold = 0.6;
       
    52 
       
    53 	/**
       
    54 	 * Inline display helper object name.
       
    55 	 *
       
    56 	 * @var string
       
    57 	 * @access protected
       
    58 	 * @since 2.6.0
       
    59 	 */
       
    60 	var $inline_diff_renderer = 'WP_Text_Diff_Renderer_inline';
       
    61 
       
    62 	/**
       
    63 	 * Should we show the split view or not
       
    64 	 *
       
    65 	 * @var string
       
    66 	 * @access protected
       
    67 	 * @since 3.6.0
       
    68 	 */
       
    69 	var $_show_split_view = true;
       
    70 
       
    71 	/**
       
    72 	 * Constructor - Call parent constructor with params array.
       
    73 	 *
       
    74 	 * This will set class properties based on the key value pairs in the array.
       
    75 	 *
       
    76 	 * @since 2.6.0
       
    77 	 *
       
    78 	 * @param array $params
       
    79 	 */
       
    80 	function __construct( $params = array() ) {
       
    81 		parent::__construct( $params );
       
    82 		if ( isset( $params[ 'show_split_view' ] ) )
       
    83 			$this->_show_split_view = $params[ 'show_split_view' ];
       
    84 	}
       
    85 
       
    86 	/**
       
    87 	 * @ignore
       
    88 	 *
       
    89 	 * @param string $header
       
    90 	 * @return string
       
    91 	 */
       
    92 	function _startBlock( $header ) {
       
    93 		return '';
       
    94 	}
       
    95 
       
    96 	/**
       
    97 	 * @ignore
       
    98 	 *
       
    99 	 * @param array $lines
       
   100 	 * @param string $prefix
       
   101 	 */
       
   102 	function _lines( $lines, $prefix=' ' ) {
       
   103 	}
       
   104 
       
   105 	/**
       
   106 	 * @ignore
       
   107 	 *
       
   108 	 * @param string $line HTML-escape the value.
       
   109 	 * @return string
       
   110 	 */
       
   111 	function addedLine( $line ) {
       
   112 		return "<td class='diff-addedline'>{$line}</td>";
       
   113 
       
   114 	}
       
   115 
       
   116 	/**
       
   117 	 * @ignore
       
   118 	 *
       
   119 	 * @param string $line HTML-escape the value.
       
   120 	 * @return string
       
   121 	 */
       
   122 	function deletedLine( $line ) {
       
   123 		return "<td class='diff-deletedline'>{$line}</td>";
       
   124 	}
       
   125 
       
   126 	/**
       
   127 	 * @ignore
       
   128 	 *
       
   129 	 * @param string $line HTML-escape the value.
       
   130 	 * @return string
       
   131 	 */
       
   132 	function contextLine( $line ) {
       
   133 		return "<td class='diff-context'>{$line}</td>";
       
   134 	}
       
   135 
       
   136 	/**
       
   137 	 * @ignore
       
   138 	 *
       
   139 	 * @return string
       
   140 	 */
       
   141 	function emptyLine() {
       
   142 		return '<td>&nbsp;</td>';
       
   143 	}
       
   144 
       
   145 	/**
       
   146 	 * @ignore
       
   147 	 * @access private
       
   148 	 *
       
   149 	 * @param array $lines
       
   150 	 * @param bool $encode
       
   151 	 * @return string
       
   152 	 */
       
   153 	function _added( $lines, $encode = true ) {
       
   154 		$r = '';
       
   155 		foreach ($lines as $line) {
       
   156 			if ( $encode )
       
   157 				$line = htmlspecialchars( $line );
       
   158 			if ( $this->_show_split_view ) {
       
   159 				$r .= '<tr>' . $this->emptyLine() . $this->emptyLine() . $this->addedLine( $line ) . "</tr>\n";
       
   160 			} else {
       
   161 				$r .= '<tr>' . $this->addedLine( $line ) . "</tr>\n";
       
   162 			}
       
   163 		}
       
   164 		return $r;
       
   165 	}
       
   166 
       
   167 	/**
       
   168 	 * @ignore
       
   169 	 * @access private
       
   170 	 *
       
   171 	 * @param array $lines
       
   172 	 * @param bool $encode
       
   173 	 * @return string
       
   174 	 */
       
   175 	function _deleted( $lines, $encode = true ) {
       
   176 		$r = '';
       
   177 		foreach ($lines as $line) {
       
   178 			if ( $encode )
       
   179 				$line = htmlspecialchars( $line );
       
   180 			if ( $this->_show_split_view ) {
       
   181 				$r .= '<tr>' . $this->deletedLine( $line ) . $this->emptyLine() . $this->emptyLine() . "</tr>\n";
       
   182 			} else {
       
   183 				$r .= '<tr>' . $this->deletedLine( $line ) . "</tr>\n";
       
   184 			}
       
   185 
       
   186 		}
       
   187 		return $r;
       
   188 	}
       
   189 
       
   190 	/**
       
   191 	 * @ignore
       
   192 	 * @access private
       
   193 	 *
       
   194 	 * @param array $lines
       
   195 	 * @param bool $encode
       
   196 	 * @return string
       
   197 	 */
       
   198 	function _context( $lines, $encode = true ) {
       
   199 		$r = '';
       
   200 		foreach ($lines as $line) {
       
   201 			if ( $encode )
       
   202 				$line = htmlspecialchars( $line );
       
   203 			if (  $this->_show_split_view ) {
       
   204 				$r .= '<tr>' . $this->contextLine( $line ) . $this->emptyLine() . $this->contextLine( $line )  . "</tr>\n";
       
   205 			} else {
       
   206 				$r .= '<tr>' . $this->contextLine( $line ) . "</tr>\n";
       
   207 			}
       
   208 		}
       
   209 		return $r;
       
   210 	}
       
   211 
       
   212 	/**
       
   213 	 * Process changed lines to do word-by-word diffs for extra highlighting.
       
   214 	 *
       
   215 	 * (TRAC style) sometimes these lines can actually be deleted or added rows.
       
   216 	 * We do additional processing to figure that out
       
   217 	 *
       
   218 	 * @access private
       
   219 	 * @since 2.6.0
       
   220 	 *
       
   221 	 * @param array $orig
       
   222 	 * @param array $final
       
   223 	 * @return string
       
   224 	 */
       
   225 	function _changed( $orig, $final ) {
       
   226 		$r = '';
       
   227 
       
   228 		// Does the aforementioned additional processing
       
   229 		// *_matches tell what rows are "the same" in orig and final. Those pairs will be diffed to get word changes
       
   230 		//	match is numeric: an index in other column
       
   231 		//	match is 'X': no match. It is a new row
       
   232 		// *_rows are column vectors for the orig column and the final column.
       
   233 		//	row >= 0: an indix of the $orig or $final array
       
   234 		//	row  < 0: a blank row for that column
       
   235 		list($orig_matches, $final_matches, $orig_rows, $final_rows) = $this->interleave_changed_lines( $orig, $final );
       
   236 
       
   237 		// These will hold the word changes as determined by an inline diff
       
   238 		$orig_diffs  = array();
       
   239 		$final_diffs = array();
       
   240 
       
   241 		// Compute word diffs for each matched pair using the inline diff
       
   242 		foreach ( $orig_matches as $o => $f ) {
       
   243 			if ( is_numeric($o) && is_numeric($f) ) {
       
   244 				$text_diff = new Text_Diff( 'auto', array( array($orig[$o]), array($final[$f]) ) );
       
   245 				$renderer = new $this->inline_diff_renderer;
       
   246 				$diff = $renderer->render( $text_diff );
       
   247 
       
   248 				// If they're too different, don't include any <ins> or <dels>
       
   249 				if ( $diff_count = preg_match_all( '!(<ins>.*?</ins>|<del>.*?</del>)!', $diff, $diff_matches ) ) {
       
   250 					// length of all text between <ins> or <del>
       
   251 					$stripped_matches = strlen(strip_tags( join(' ', $diff_matches[0]) ));
       
   252 					// since we count lengith of text between <ins> or <del> (instead of picking just one),
       
   253 					//	we double the length of chars not in those tags.
       
   254 					$stripped_diff = strlen(strip_tags( $diff )) * 2 - $stripped_matches;
       
   255 					$diff_ratio = $stripped_matches / $stripped_diff;
       
   256 					if ( $diff_ratio > $this->_diff_threshold )
       
   257 						continue; // Too different. Don't save diffs.
       
   258 				}
       
   259 
       
   260 				// Un-inline the diffs by removing del or ins
       
   261 				$orig_diffs[$o]  = preg_replace( '|<ins>.*?</ins>|', '', $diff );
       
   262 				$final_diffs[$f] = preg_replace( '|<del>.*?</del>|', '', $diff );
       
   263 			}
       
   264 		}
       
   265 
       
   266 		foreach ( array_keys($orig_rows) as $row ) {
       
   267 			// Both columns have blanks. Ignore them.
       
   268 			if ( $orig_rows[$row] < 0 && $final_rows[$row] < 0 )
       
   269 				continue;
       
   270 
       
   271 			// If we have a word based diff, use it. Otherwise, use the normal line.
       
   272 			if ( isset( $orig_diffs[$orig_rows[$row]] ) )
       
   273 				$orig_line = $orig_diffs[$orig_rows[$row]];
       
   274 			elseif ( isset( $orig[$orig_rows[$row]] ) )
       
   275 				$orig_line = htmlspecialchars($orig[$orig_rows[$row]]);
       
   276 			else
       
   277 				$orig_line = '';
       
   278 
       
   279 			if ( isset( $final_diffs[$final_rows[$row]] ) )
       
   280 				$final_line = $final_diffs[$final_rows[$row]];
       
   281 			elseif ( isset( $final[$final_rows[$row]] ) )
       
   282 				$final_line = htmlspecialchars($final[$final_rows[$row]]);
       
   283 			else
       
   284 				$final_line = '';
       
   285 
       
   286 			if ( $orig_rows[$row] < 0 ) { // Orig is blank. This is really an added row.
       
   287 				$r .= $this->_added( array($final_line), false );
       
   288 			} elseif ( $final_rows[$row] < 0 ) { // Final is blank. This is really a deleted row.
       
   289 				$r .= $this->_deleted( array($orig_line), false );
       
   290 			} else { // A true changed row.
       
   291 				if ( $this->_show_split_view ) {
       
   292 					$r .= '<tr>' . $this->deletedLine( $orig_line ) . $this->emptyLine() . $this->addedLine( $final_line ) . "</tr>\n";
       
   293 				} else {
       
   294 					$r .= '<tr>' . $this->deletedLine( $orig_line ) . "</tr><tr>" . $this->addedLine( $final_line ) . "</tr>\n";
       
   295 				}
       
   296 			}
       
   297 		}
       
   298 
       
   299 		return $r;
       
   300 	}
       
   301 
       
   302 	/**
       
   303 	 * Takes changed blocks and matches which rows in orig turned into which rows in final.
       
   304 	 *
       
   305 	 * Returns
       
   306 	 *	*_matches ( which rows match with which )
       
   307 	 *	*_rows ( order of rows in each column interleaved with blank rows as
       
   308 	 *		necessary )
       
   309 	 *
       
   310 	 * @since 2.6.0
       
   311 	 *
       
   312 	 * @param unknown_type $orig
       
   313 	 * @param unknown_type $final
       
   314 	 * @return unknown
       
   315 	 */
       
   316 	function interleave_changed_lines( $orig, $final ) {
       
   317 
       
   318 		// Contains all pairwise string comparisons. Keys are such that this need only be a one dimensional array.
       
   319 		$matches = array();
       
   320 		foreach ( array_keys($orig) as $o ) {
       
   321 			foreach ( array_keys($final) as $f ) {
       
   322 				$matches["$o,$f"] = $this->compute_string_distance( $orig[$o], $final[$f] );
       
   323 			}
       
   324 		}
       
   325 		asort($matches); // Order by string distance.
       
   326 
       
   327 		$orig_matches  = array();
       
   328 		$final_matches = array();
       
   329 
       
   330 		foreach ( $matches as $keys => $difference ) {
       
   331 			list($o, $f) = explode(',', $keys);
       
   332 			$o = (int) $o;
       
   333 			$f = (int) $f;
       
   334 
       
   335 			// Already have better matches for these guys
       
   336 			if ( isset($orig_matches[$o]) && isset($final_matches[$f]) )
       
   337 				continue;
       
   338 
       
   339 			// First match for these guys. Must be best match
       
   340 			if ( !isset($orig_matches[$o]) && !isset($final_matches[$f]) ) {
       
   341 				$orig_matches[$o] = $f;
       
   342 				$final_matches[$f] = $o;
       
   343 				continue;
       
   344 			}
       
   345 
       
   346 			// Best match of this final is already taken?  Must mean this final is a new row.
       
   347 			if ( isset($orig_matches[$o]) )
       
   348 				$final_matches[$f] = 'x';
       
   349 
       
   350 			// Best match of this orig is already taken?  Must mean this orig is a deleted row.
       
   351 			elseif ( isset($final_matches[$f]) )
       
   352 				$orig_matches[$o] = 'x';
       
   353 		}
       
   354 
       
   355 		// We read the text in this order
       
   356 		ksort($orig_matches);
       
   357 		ksort($final_matches);
       
   358 
       
   359 		// Stores rows and blanks for each column.
       
   360 		$orig_rows = $orig_rows_copy = array_keys($orig_matches);
       
   361 		$final_rows = array_keys($final_matches);
       
   362 
       
   363 		// Interleaves rows with blanks to keep matches aligned.
       
   364 		// We may end up with some extraneous blank rows, but we'll just ignore them later.
       
   365 		foreach ( $orig_rows_copy as $orig_row ) {
       
   366 			$final_pos = array_search($orig_matches[$orig_row], $final_rows, true);
       
   367 			$orig_pos = (int) array_search($orig_row, $orig_rows, true);
       
   368 
       
   369 			if ( false === $final_pos ) { // This orig is paired with a blank final.
       
   370 				array_splice( $final_rows, $orig_pos, 0, -1 );
       
   371 			} elseif ( $final_pos < $orig_pos ) { // This orig's match is up a ways. Pad final with blank rows.
       
   372 				$diff_pos = $final_pos - $orig_pos;
       
   373 				while ( $diff_pos < 0 )
       
   374 					array_splice( $final_rows, $orig_pos, 0, $diff_pos++ );
       
   375 			} elseif ( $final_pos > $orig_pos ) { // This orig's match is down a ways. Pad orig with blank rows.
       
   376 				$diff_pos = $orig_pos - $final_pos;
       
   377 				while ( $diff_pos < 0 )
       
   378 					array_splice( $orig_rows, $orig_pos, 0, $diff_pos++ );
       
   379 			}
       
   380 		}
       
   381 
       
   382 		// Pad the ends with blank rows if the columns aren't the same length
       
   383 		$diff_count = count($orig_rows) - count($final_rows);
       
   384 		if ( $diff_count < 0 ) {
       
   385 			while ( $diff_count < 0 )
       
   386 				array_push($orig_rows, $diff_count++);
       
   387 		} elseif ( $diff_count > 0 ) {
       
   388 			$diff_count = -1 * $diff_count;
       
   389 			while ( $diff_count < 0 )
       
   390 				array_push($final_rows, $diff_count++);
       
   391 		}
       
   392 
       
   393 		return array($orig_matches, $final_matches, $orig_rows, $final_rows);
       
   394 
       
   395 /*
       
   396 		// Debug
       
   397 		echo "\n\n\n\n\n";
       
   398 
       
   399 		echo "-- DEBUG Matches: Orig -> Final --";
       
   400 
       
   401 		foreach ( $orig_matches as $o => $f ) {
       
   402 			echo "\n\n\n\n\n";
       
   403 			echo "ORIG: $o, FINAL: $f\n";
       
   404 			var_dump($orig[$o],$final[$f]);
       
   405 		}
       
   406 		echo "\n\n\n\n\n";
       
   407 
       
   408 		echo "-- DEBUG Matches: Final -> Orig --";
       
   409 
       
   410 		foreach ( $final_matches as $f => $o ) {
       
   411 			echo "\n\n\n\n\n";
       
   412 			echo "FINAL: $f, ORIG: $o\n";
       
   413 			var_dump($final[$f],$orig[$o]);
       
   414 		}
       
   415 		echo "\n\n\n\n\n";
       
   416 
       
   417 		echo "-- DEBUG Rows: Orig -- Final --";
       
   418 
       
   419 		echo "\n\n\n\n\n";
       
   420 		foreach ( $orig_rows as $row => $o ) {
       
   421 			if ( $o < 0 )
       
   422 				$o = 'X';
       
   423 			$f = $final_rows[$row];
       
   424 			if ( $f < 0 )
       
   425 				$f = 'X';
       
   426 			echo "$o -- $f\n";
       
   427 		}
       
   428 		echo "\n\n\n\n\n";
       
   429 
       
   430 		echo "-- END DEBUG --";
       
   431 
       
   432 		echo "\n\n\n\n\n";
       
   433 
       
   434 		return array($orig_matches, $final_matches, $orig_rows, $final_rows);
       
   435 */
       
   436 	}
       
   437 
       
   438 	/**
       
   439 	 * Computes a number that is intended to reflect the "distance" between two strings.
       
   440 	 *
       
   441 	 * @since 2.6.0
       
   442 	 *
       
   443 	 * @param string $string1
       
   444 	 * @param string $string2
       
   445 	 * @return int
       
   446 	 */
       
   447 	function compute_string_distance( $string1, $string2 ) {
       
   448 		// Vectors containing character frequency for all chars in each string
       
   449 		$chars1 = count_chars($string1);
       
   450 		$chars2 = count_chars($string2);
       
   451 
       
   452 		// L1-norm of difference vector.
       
   453 		$difference = array_sum( array_map( array($this, 'difference'), $chars1, $chars2 ) );
       
   454 
       
   455 		// $string1 has zero length? Odd. Give huge penalty by not dividing.
       
   456 		if ( !$string1 )
       
   457 			return $difference;
       
   458 
       
   459 		// Return distance per charcter (of string1)
       
   460 		return $difference / strlen($string1);
       
   461 	}
       
   462 
       
   463 	/**
       
   464 	 * @ignore
       
   465 	 * @since 2.6.0
       
   466 	 *
       
   467 	 * @param int $a
       
   468 	 * @param int $b
       
   469 	 * @return int
       
   470 	 */
       
   471 	function difference( $a, $b ) {
       
   472 		return abs( $a - $b );
       
   473 	}
       
   474 
       
   475 }
       
   476 
       
   477 /**
       
   478  * Better word splitting than the PEAR package provides.
       
   479  *
       
   480  * @since 2.6.0
       
   481  * @uses Text_Diff_Renderer_inline Extends
       
   482  */
       
   483 class WP_Text_Diff_Renderer_inline extends Text_Diff_Renderer_inline {
       
   484 
       
   485 	/**
       
   486 	 * @ignore
       
   487 	 * @since 2.6.0
       
   488 	 *
       
   489 	 * @param string $string
       
   490 	 * @param string $newlineEscape
       
   491 	 * @return string
       
   492 	 */
       
   493 	function _splitOnWords($string, $newlineEscape = "\n") {
       
   494 		$string = str_replace("\0", '', $string);
       
   495 		$words  = preg_split( '/([^\w])/u', $string, -1, PREG_SPLIT_DELIM_CAPTURE );
       
   496 		$words  = str_replace( "\n", $newlineEscape, $words );
       
   497 		return $words;
       
   498 	}
       
   499 
       
   500 }