web/wp-includes/wp-diff.php
branchwordpress
changeset 109 03b0d1493584
child 132 4d4862461b8d
equal deleted inserted replaced
-1:000000000000 109:03b0d1493584
       
     1 <?php
       
     2 /**
       
     3  * WordPress Diff bastard child of old MediaWiki Diff Formatter.
       
     4  *
       
     5  * Basically all that remains is the table structure and some method names.
       
     6  *
       
     7  * @package WordPress
       
     8  * @subpackage Diff
       
     9  */
       
    10 
       
    11 if ( !class_exists( 'Text_Diff' ) ) {
       
    12 	/** Text_Diff class */
       
    13 	require( dirname(__FILE__).'/Text/Diff.php' );
       
    14 	/** Text_Diff_Renderer class */
       
    15 	require( dirname(__FILE__).'/Text/Diff/Renderer.php' );
       
    16 	/** Text_Diff_Renderer_inline class */
       
    17 	require( dirname(__FILE__).'/Text/Diff/Renderer/inline.php' );
       
    18 }
       
    19 
       
    20 /**
       
    21  * Table renderer to display the diff lines.
       
    22  *
       
    23  * @since 2.6.0
       
    24  * @uses Text_Diff_Renderer Extends
       
    25  */
       
    26 class WP_Text_Diff_Renderer_Table extends Text_Diff_Renderer {
       
    27 
       
    28 	/**
       
    29 	 * @see Text_Diff_Renderer::_leading_context_lines
       
    30 	 * @var int
       
    31 	 * @access protected
       
    32 	 * @since 2.6.0
       
    33 	 */
       
    34 	var $_leading_context_lines  = 10000;
       
    35 
       
    36 	/**
       
    37 	 * @see Text_Diff_Renderer::_trailing_context_lines
       
    38 	 * @var int
       
    39 	 * @access protected
       
    40 	 * @since 2.6.0
       
    41 	 */
       
    42 	var $_trailing_context_lines = 10000;
       
    43 
       
    44 	/**
       
    45 	 * {@internal Missing Description}}
       
    46 	 *
       
    47 	 * @var float
       
    48 	 * @access protected
       
    49 	 * @since 2.6.0
       
    50 	 */
       
    51 	var $_diff_threshold = 0.6;
       
    52 
       
    53 	/**
       
    54 	 * Inline display helper object name.
       
    55 	 *
       
    56 	 * @var string
       
    57 	 * @access protected
       
    58 	 * @since 2.6.0
       
    59 	 */
       
    60 	var $inline_diff_renderer = 'WP_Text_Diff_Renderer_inline';
       
    61 
       
    62 	/**
       
    63 	 * PHP4 Constructor - Call parent constructor with params array.
       
    64 	 *
       
    65 	 * This will set class properties based on the key value pairs in the array.
       
    66 	 *
       
    67 	 * @since unknown
       
    68 	 *
       
    69 	 * @param array $params
       
    70 	 */
       
    71 	function Text_Diff_Renderer_Table( $params = array() ) {
       
    72 		$parent = get_parent_class($this);
       
    73 		$this->$parent( $params );
       
    74 	}
       
    75 
       
    76 	/**
       
    77 	 * @ignore
       
    78 	 *
       
    79 	 * @param string $header
       
    80 	 * @return string
       
    81 	 */
       
    82 	function _startBlock( $header ) {
       
    83 		return '';
       
    84 	}
       
    85 
       
    86 	/**
       
    87 	 * @ignore
       
    88 	 *
       
    89 	 * @param array $lines
       
    90 	 * @param string $prefix
       
    91 	 */
       
    92 	function _lines( $lines, $prefix=' ' ) {
       
    93 	}
       
    94 
       
    95 	/**
       
    96 	 * @ignore
       
    97 	 *
       
    98 	 * @param string $line HTML-escape the value.
       
    99 	 * @return string
       
   100 	 */
       
   101 	function addedLine( $line ) {
       
   102 		return "<td>+</td><td class='diff-addedline'>{$line}</td>";
       
   103 	}
       
   104 
       
   105 	/**
       
   106 	 * @ignore
       
   107 	 *
       
   108 	 * @param string $line HTML-escape the value.
       
   109 	 * @return string
       
   110 	 */
       
   111 	function deletedLine( $line ) {
       
   112 		return "<td>-</td><td class='diff-deletedline'>{$line}</td>";
       
   113 	}
       
   114 
       
   115 	/**
       
   116 	 * @ignore
       
   117 	 *
       
   118 	 * @param string $line HTML-escape the value.
       
   119 	 * @return string
       
   120 	 */
       
   121 	function contextLine( $line ) {
       
   122 		return "<td> </td><td class='diff-context'>{$line}</td>";
       
   123 	}
       
   124 
       
   125 	/**
       
   126 	 * @ignore
       
   127 	 *
       
   128 	 * @return string
       
   129 	 */
       
   130 	function emptyLine() {
       
   131 		return '<td colspan="2">&nbsp;</td>';
       
   132 	}
       
   133 
       
   134 	/**
       
   135 	 * @ignore
       
   136 	 * @access private
       
   137 	 *
       
   138 	 * @param array $lines
       
   139 	 * @param bool $encode
       
   140 	 * @return string
       
   141 	 */
       
   142 	function _added( $lines, $encode = true ) {
       
   143 		$r = '';
       
   144 		foreach ($lines as $line) {
       
   145 			if ( $encode )
       
   146 				$line = htmlspecialchars( $line );
       
   147 			$r .= '<tr>' . $this->emptyLine() . $this->addedLine( $line ) . "</tr>\n";
       
   148 		}
       
   149 		return $r;
       
   150 	}
       
   151 
       
   152 	/**
       
   153 	 * @ignore
       
   154 	 * @access private
       
   155 	 *
       
   156 	 * @param array $lines
       
   157 	 * @param bool $encode
       
   158 	 * @return string
       
   159 	 */
       
   160 	function _deleted( $lines, $encode = true ) {
       
   161 		$r = '';
       
   162 		foreach ($lines as $line) {
       
   163 			if ( $encode )
       
   164 				$line = htmlspecialchars( $line );
       
   165 			$r .= '<tr>' . $this->deletedLine( $line ) . $this->emptyLine() . "</tr>\n";
       
   166 		}
       
   167 		return $r;
       
   168 	}
       
   169 
       
   170 	/**
       
   171 	 * @ignore
       
   172 	 * @access private
       
   173 	 *
       
   174 	 * @param array $lines
       
   175 	 * @param bool $encode
       
   176 	 * @return string
       
   177 	 */
       
   178 	function _context( $lines, $encode = true ) {
       
   179 		$r = '';
       
   180 		foreach ($lines as $line) {
       
   181 			if ( $encode )
       
   182 				$line = htmlspecialchars( $line );
       
   183 			$r .= '<tr>' .
       
   184 				$this->contextLine( $line ) . $this->contextLine( $line ) . "</tr>\n";
       
   185 		}
       
   186 		return $r;
       
   187 	}
       
   188 
       
   189 	/**
       
   190 	 * Process changed lines to do word-by-word diffs for extra highlighting.
       
   191 	 *
       
   192 	 * (TRAC style) sometimes these lines can actually be deleted or added rows.
       
   193 	 * We do additional processing to figure that out
       
   194 	 *
       
   195 	 * @access private
       
   196 	 * @since 2.6.0
       
   197 	 *
       
   198 	 * @param array $orig
       
   199 	 * @param array $final
       
   200 	 * @return string
       
   201 	 */
       
   202 	function _changed( $orig, $final ) {
       
   203 		$r = '';
       
   204 
       
   205 		// Does the aforementioned additional processing
       
   206 		// *_matches tell what rows are "the same" in orig and final.  Those pairs will be diffed to get word changes
       
   207 		//	match is numeric: an index in other column
       
   208 		//	match is 'X': no match.  It is a new row
       
   209 		// *_rows are column vectors for the orig column and the final column.
       
   210 		//	row >= 0: an indix of the $orig or $final array
       
   211 		//	row  < 0: a blank row for that column
       
   212 		list($orig_matches, $final_matches, $orig_rows, $final_rows) = $this->interleave_changed_lines( $orig, $final );
       
   213 
       
   214 
       
   215 		// These will hold the word changes as determined by an inline diff
       
   216 		$orig_diffs  = array();
       
   217 		$final_diffs = array();
       
   218 
       
   219 		// Compute word diffs for each matched pair using the inline diff
       
   220 		foreach ( $orig_matches as $o => $f ) {
       
   221 			if ( is_numeric($o) && is_numeric($f) ) {
       
   222 				$text_diff = new Text_Diff( 'auto', array( array($orig[$o]), array($final[$f]) ) );
       
   223 				$renderer = new $this->inline_diff_renderer;
       
   224 				$diff = $renderer->render( $text_diff );
       
   225 
       
   226 				// If they're too different, don't include any <ins> or <dels>
       
   227 				if ( $diff_count = preg_match_all( '!(<ins>.*?</ins>|<del>.*?</del>)!', $diff, $diff_matches ) ) {
       
   228 					// length of all text between <ins> or <del>
       
   229 					$stripped_matches = strlen(strip_tags( join(' ', $diff_matches[0]) ));
       
   230 					// since we count lengith of text between <ins> or <del> (instead of picking just one),
       
   231 					//	we double the length of chars not in those tags.
       
   232 					$stripped_diff = strlen(strip_tags( $diff )) * 2 - $stripped_matches;
       
   233 					$diff_ratio = $stripped_matches / $stripped_diff;
       
   234 					if ( $diff_ratio > $this->_diff_threshold )
       
   235 						continue; // Too different.  Don't save diffs.
       
   236 				}
       
   237 
       
   238 				// Un-inline the diffs by removing del or ins
       
   239 				$orig_diffs[$o]  = preg_replace( '|<ins>.*?</ins>|', '', $diff );
       
   240 				$final_diffs[$f] = preg_replace( '|<del>.*?</del>|', '', $diff );
       
   241 			}
       
   242 		}
       
   243 
       
   244 		foreach ( array_keys($orig_rows) as $row ) {
       
   245 			// Both columns have blanks.  Ignore them.
       
   246 			if ( $orig_rows[$row] < 0 && $final_rows[$row] < 0 )
       
   247 				continue;
       
   248 
       
   249 			// If we have a word based diff, use it.  Otherwise, use the normal line.
       
   250 			$orig_line  = isset($orig_diffs[$orig_rows[$row]])
       
   251 				? $orig_diffs[$orig_rows[$row]]
       
   252 				: htmlspecialchars($orig[$orig_rows[$row]]);
       
   253 			$final_line = isset($final_diffs[$final_rows[$row]])
       
   254 				? $final_diffs[$final_rows[$row]]
       
   255 				: htmlspecialchars($final[$final_rows[$row]]);
       
   256 
       
   257 			if ( $orig_rows[$row] < 0 ) { // Orig is blank.  This is really an added row.
       
   258 				$r .= $this->_added( array($final_line), false );
       
   259 			} elseif ( $final_rows[$row] < 0 ) { // Final is blank.  This is really a deleted row.
       
   260 				$r .= $this->_deleted( array($orig_line), false );
       
   261 			} else { // A true changed row.
       
   262 				$r .= '<tr>' . $this->deletedLine( $orig_line ) . $this->addedLine( $final_line ) . "</tr>\n";
       
   263 			}
       
   264 		}
       
   265 
       
   266 		return $r;
       
   267 	}
       
   268 
       
   269 	/**
       
   270 	 * Takes changed blocks and matches which rows in orig turned into which rows in final.
       
   271 	 *
       
   272 	 * Returns
       
   273 	 *	*_matches ( which rows match with which )
       
   274 	 *	*_rows ( order of rows in each column interleaved with blank rows as
       
   275 	 *		necessary )
       
   276 	 *
       
   277 	 * @since 2.6.0
       
   278 	 *
       
   279 	 * @param unknown_type $orig
       
   280 	 * @param unknown_type $final
       
   281 	 * @return unknown
       
   282 	 */
       
   283 	function interleave_changed_lines( $orig, $final ) {
       
   284 
       
   285 		// Contains all pairwise string comparisons.  Keys are such that this need only be a one dimensional array.
       
   286 		$matches = array();
       
   287 		foreach ( array_keys($orig) as $o ) {
       
   288 			foreach ( array_keys($final) as $f ) {
       
   289 				$matches["$o,$f"] = $this->compute_string_distance( $orig[$o], $final[$f] );
       
   290 			}
       
   291 		}
       
   292 		asort($matches); // Order by string distance.
       
   293 
       
   294 		$orig_matches  = array();
       
   295 		$final_matches = array();
       
   296 
       
   297 		foreach ( $matches as $keys => $difference ) {
       
   298 			list($o, $f) = explode(',', $keys);
       
   299 			$o = (int) $o;
       
   300 			$f = (int) $f;
       
   301 
       
   302 			// Already have better matches for these guys
       
   303 			if ( isset($orig_matches[$o]) && isset($final_matches[$f]) )
       
   304 				continue;
       
   305 
       
   306 			// First match for these guys.  Must be best match
       
   307 			if ( !isset($orig_matches[$o]) && !isset($final_matches[$f]) ) {
       
   308 				$orig_matches[$o] = $f;
       
   309 				$final_matches[$f] = $o;
       
   310 				continue;
       
   311 			}
       
   312 
       
   313 			// Best match of this final is already taken?  Must mean this final is a new row.
       
   314 			if ( isset($orig_matches[$o]) )
       
   315 				$final_matches[$f] = 'x';
       
   316 
       
   317 			// Best match of this orig is already taken?  Must mean this orig is a deleted row.
       
   318 			elseif ( isset($final_matches[$f]) )
       
   319 				$orig_matches[$o] = 'x';
       
   320 		}
       
   321 
       
   322 		// We read the text in this order
       
   323 		ksort($orig_matches);
       
   324 		ksort($final_matches);
       
   325 
       
   326 
       
   327 		// Stores rows and blanks for each column.
       
   328 		$orig_rows = $orig_rows_copy = array_keys($orig_matches);
       
   329 		$final_rows = array_keys($final_matches);
       
   330 
       
   331 		// Interleaves rows with blanks to keep matches aligned.
       
   332 		// We may end up with some extraneous blank rows, but we'll just ignore them later.
       
   333 		foreach ( $orig_rows_copy as $orig_row ) {
       
   334 			$final_pos = array_search($orig_matches[$orig_row], $final_rows, true);
       
   335 			$orig_pos = (int) array_search($orig_row, $orig_rows, true);
       
   336 
       
   337 			if ( false === $final_pos ) { // This orig is paired with a blank final.
       
   338 				array_splice( $final_rows, $orig_pos, 0, -1 );
       
   339 			} elseif ( $final_pos < $orig_pos ) { // This orig's match is up a ways.  Pad final with blank rows.
       
   340 				$diff_pos = $final_pos - $orig_pos;
       
   341 				while ( $diff_pos < 0 )
       
   342 					array_splice( $final_rows, $orig_pos, 0, $diff_pos++ );
       
   343 			} elseif ( $final_pos > $orig_pos ) { // This orig's match is down a ways.  Pad orig with blank rows.
       
   344 				$diff_pos = $orig_pos - $final_pos;
       
   345 				while ( $diff_pos < 0 )
       
   346 					array_splice( $orig_rows, $orig_pos, 0, $diff_pos++ );
       
   347 			}
       
   348 		}
       
   349 
       
   350 
       
   351 		// Pad the ends with blank rows if the columns aren't the same length
       
   352 		$diff_count = count($orig_rows) - count($final_rows);
       
   353 		if ( $diff_count < 0 ) {
       
   354 			while ( $diff_count < 0 )
       
   355 				array_push($orig_rows, $diff_count++);
       
   356 		} elseif ( $diff_count > 0 ) {
       
   357 			$diff_count = -1 * $diff_count;
       
   358 			while ( $diff_count < 0 )
       
   359 				array_push($final_rows, $diff_count++);
       
   360 		}
       
   361 
       
   362 		return array($orig_matches, $final_matches, $orig_rows, $final_rows);
       
   363 
       
   364 /*
       
   365 		// Debug
       
   366 		echo "\n\n\n\n\n";
       
   367 
       
   368 		echo "-- DEBUG Matches: Orig -> Final --";
       
   369 
       
   370 		foreach ( $orig_matches as $o => $f ) {
       
   371 			echo "\n\n\n\n\n";
       
   372 			echo "ORIG: $o, FINAL: $f\n";
       
   373 			var_dump($orig[$o],$final[$f]);
       
   374 		}
       
   375 		echo "\n\n\n\n\n";
       
   376 
       
   377 		echo "-- DEBUG Matches: Final -> Orig --";
       
   378 
       
   379 		foreach ( $final_matches as $f => $o ) {
       
   380 			echo "\n\n\n\n\n";
       
   381 			echo "FINAL: $f, ORIG: $o\n";
       
   382 			var_dump($final[$f],$orig[$o]);
       
   383 		}
       
   384 		echo "\n\n\n\n\n";
       
   385 
       
   386 		echo "-- DEBUG Rows: Orig -- Final --";
       
   387 
       
   388 		echo "\n\n\n\n\n";
       
   389 		foreach ( $orig_rows as $row => $o ) {
       
   390 			if ( $o < 0 )
       
   391 				$o = 'X';
       
   392 			$f = $final_rows[$row];
       
   393 			if ( $f < 0 )
       
   394 				$f = 'X';
       
   395 			echo "$o -- $f\n";
       
   396 		}
       
   397 		echo "\n\n\n\n\n";
       
   398 
       
   399 		echo "-- END DEBUG --";
       
   400 
       
   401 		echo "\n\n\n\n\n";
       
   402 
       
   403 		return array($orig_matches, $final_matches, $orig_rows, $final_rows);
       
   404 */
       
   405 	}
       
   406 
       
   407 	/**
       
   408 	 * Computes a number that is intended to reflect the "distance" between two strings.
       
   409 	 *
       
   410 	 * @since 2.6.0
       
   411 	 *
       
   412 	 * @param string $string1
       
   413 	 * @param string $string2
       
   414 	 * @return int
       
   415 	 */
       
   416 	function compute_string_distance( $string1, $string2 ) {
       
   417 		// Vectors containing character frequency for all chars in each string
       
   418 		$chars1 = count_chars($string1);
       
   419 		$chars2 = count_chars($string2);
       
   420 
       
   421 		// L1-norm of difference vector.
       
   422 		$difference = array_sum( array_map( array(&$this, 'difference'), $chars1, $chars2 ) );
       
   423 
       
   424 		// $string1 has zero length? Odd.  Give huge penalty by not dividing.
       
   425 		if ( !$string1 )
       
   426 			return $difference;
       
   427 
       
   428 		// Return distance per charcter (of string1)
       
   429 		return $difference / strlen($string1);
       
   430 	}
       
   431 
       
   432 	/**
       
   433 	 * @ignore
       
   434 	 * @since 2.6.0
       
   435 	 *
       
   436 	 * @param int $a
       
   437 	 * @param int $b
       
   438 	 * @return int
       
   439 	 */
       
   440 	function difference( $a, $b ) {
       
   441 		return abs( $a - $b );
       
   442 	}
       
   443 
       
   444 }
       
   445 
       
   446 /**
       
   447  * Better word splitting than the PEAR package provides.
       
   448  *
       
   449  * @since 2.6.0
       
   450  * @uses Text_Diff_Renderer_inline Extends
       
   451  */
       
   452 class WP_Text_Diff_Renderer_inline extends Text_Diff_Renderer_inline {
       
   453 
       
   454 	/**
       
   455 	 * @ignore
       
   456 	 * @since 2.6.0
       
   457 	 *
       
   458 	 * @param string $string
       
   459 	 * @param string $newlineEscape
       
   460 	 * @return string
       
   461 	 */
       
   462 	function _splitOnWords($string, $newlineEscape = "\n") {
       
   463 		$string = str_replace("\0", '', $string);
       
   464 		$words  = preg_split( '/([^\w])/u', $string, -1, PREG_SPLIT_DELIM_CAPTURE );
       
   465 		$words  = str_replace( "\n", $newlineEscape, $words );
       
   466 		return $words;
       
   467 	}
       
   468 
       
   469 }
       
   470 
       
   471 ?>