246 $orig_diffs = array(); |
262 $orig_diffs = array(); |
247 $final_diffs = array(); |
263 $final_diffs = array(); |
248 |
264 |
249 // Compute word diffs for each matched pair using the inline diff |
265 // Compute word diffs for each matched pair using the inline diff |
250 foreach ( $orig_matches as $o => $f ) { |
266 foreach ( $orig_matches as $o => $f ) { |
251 if ( is_numeric($o) && is_numeric($f) ) { |
267 if ( is_numeric( $o ) && is_numeric( $f ) ) { |
252 $text_diff = new Text_Diff( 'auto', array( array($orig[$o]), array($final[$f]) ) ); |
268 $text_diff = new Text_Diff( 'auto', array( array( $orig[ $o ] ), array( $final[ $f ] ) ) ); |
253 $renderer = new $this->inline_diff_renderer; |
269 $renderer = new $this->inline_diff_renderer; |
254 $diff = $renderer->render( $text_diff ); |
270 $diff = $renderer->render( $text_diff ); |
255 |
271 |
256 // If they're too different, don't include any <ins> or <dels> |
272 // If they're too different, don't include any <ins> or <dels> |
257 if ( preg_match_all( '!(<ins>.*?</ins>|<del>.*?</del>)!', $diff, $diff_matches ) ) { |
273 if ( preg_match_all( '!(<ins>.*?</ins>|<del>.*?</del>)!', $diff, $diff_matches ) ) { |
258 // length of all text between <ins> or <del> |
274 // length of all text between <ins> or <del> |
259 $stripped_matches = strlen(strip_tags( join(' ', $diff_matches[0]) )); |
275 $stripped_matches = strlen( strip_tags( join( ' ', $diff_matches[0] ) ) ); |
260 // since we count lengith of text between <ins> or <del> (instead of picking just one), |
276 // since we count lengith of text between <ins> or <del> (instead of picking just one), |
261 // we double the length of chars not in those tags. |
277 // we double the length of chars not in those tags. |
262 $stripped_diff = strlen(strip_tags( $diff )) * 2 - $stripped_matches; |
278 $stripped_diff = strlen( strip_tags( $diff ) ) * 2 - $stripped_matches; |
263 $diff_ratio = $stripped_matches / $stripped_diff; |
279 $diff_ratio = $stripped_matches / $stripped_diff; |
264 if ( $diff_ratio > $this->_diff_threshold ) |
280 if ( $diff_ratio > $this->_diff_threshold ) { |
265 continue; // Too different. Don't save diffs. |
281 continue; // Too different. Don't save diffs. |
|
282 } |
266 } |
283 } |
267 |
284 |
268 // Un-inline the diffs by removing del or ins |
285 // Un-inline the diffs by removing del or ins |
269 $orig_diffs[$o] = preg_replace( '|<ins>.*?</ins>|', '', $diff ); |
286 $orig_diffs[ $o ] = preg_replace( '|<ins>.*?</ins>|', '', $diff ); |
270 $final_diffs[$f] = preg_replace( '|<del>.*?</del>|', '', $diff ); |
287 $final_diffs[ $f ] = preg_replace( '|<del>.*?</del>|', '', $diff ); |
271 } |
288 } |
272 } |
289 } |
273 |
290 |
274 foreach ( array_keys($orig_rows) as $row ) { |
291 foreach ( array_keys( $orig_rows ) as $row ) { |
275 // Both columns have blanks. Ignore them. |
292 // Both columns have blanks. Ignore them. |
276 if ( $orig_rows[$row] < 0 && $final_rows[$row] < 0 ) |
293 if ( $orig_rows[ $row ] < 0 && $final_rows[ $row ] < 0 ) { |
277 continue; |
294 continue; |
|
295 } |
278 |
296 |
279 // If we have a word based diff, use it. Otherwise, use the normal line. |
297 // If we have a word based diff, use it. Otherwise, use the normal line. |
280 if ( isset( $orig_diffs[$orig_rows[$row]] ) ) |
298 if ( isset( $orig_diffs[ $orig_rows[ $row ] ] ) ) { |
281 $orig_line = $orig_diffs[$orig_rows[$row]]; |
299 $orig_line = $orig_diffs[ $orig_rows[ $row ] ]; |
282 elseif ( isset( $orig[$orig_rows[$row]] ) ) |
300 } elseif ( isset( $orig[ $orig_rows[ $row ] ] ) ) { |
283 $orig_line = htmlspecialchars($orig[$orig_rows[$row]]); |
301 $orig_line = htmlspecialchars( $orig[ $orig_rows[ $row ] ] ); |
284 else |
302 } else { |
285 $orig_line = ''; |
303 $orig_line = ''; |
286 |
304 } |
287 if ( isset( $final_diffs[$final_rows[$row]] ) ) |
305 |
288 $final_line = $final_diffs[$final_rows[$row]]; |
306 if ( isset( $final_diffs[ $final_rows[ $row ] ] ) ) { |
289 elseif ( isset( $final[$final_rows[$row]] ) ) |
307 $final_line = $final_diffs[ $final_rows[ $row ] ]; |
290 $final_line = htmlspecialchars($final[$final_rows[$row]]); |
308 } elseif ( isset( $final[ $final_rows[ $row ] ] ) ) { |
291 else |
309 $final_line = htmlspecialchars( $final[ $final_rows[ $row ] ] ); |
|
310 } else { |
292 $final_line = ''; |
311 $final_line = ''; |
293 |
312 } |
294 if ( $orig_rows[$row] < 0 ) { // Orig is blank. This is really an added row. |
313 |
295 $r .= $this->_added( array($final_line), false ); |
314 if ( $orig_rows[ $row ] < 0 ) { // Orig is blank. This is really an added row. |
296 } elseif ( $final_rows[$row] < 0 ) { // Final is blank. This is really a deleted row. |
315 $r .= $this->_added( array( $final_line ), false ); |
297 $r .= $this->_deleted( array($orig_line), false ); |
316 } elseif ( $final_rows[ $row ] < 0 ) { // Final is blank. This is really a deleted row. |
|
317 $r .= $this->_deleted( array( $orig_line ), false ); |
298 } else { // A true changed row. |
318 } else { // A true changed row. |
299 if ( $this->_show_split_view ) { |
319 if ( $this->_show_split_view ) { |
300 $r .= '<tr>' . $this->deletedLine( $orig_line ) . $this->emptyLine() . $this->addedLine( $final_line ) . "</tr>\n"; |
320 $r .= '<tr>' . $this->deletedLine( $orig_line ) . $this->emptyLine() . $this->addedLine( $final_line ) . "</tr>\n"; |
301 } else { |
321 } else { |
302 $r .= '<tr>' . $this->deletedLine( $orig_line ) . "</tr><tr>" . $this->addedLine( $final_line ) . "</tr>\n"; |
322 $r .= '<tr>' . $this->deletedLine( $orig_line ) . '</tr><tr>' . $this->addedLine( $final_line ) . "</tr>\n"; |
303 } |
323 } |
304 } |
324 } |
305 } |
325 } |
306 |
326 |
307 return $r; |
327 return $r; |
337 */ |
357 */ |
338 public function interleave_changed_lines( $orig, $final ) { |
358 public function interleave_changed_lines( $orig, $final ) { |
339 |
359 |
340 // Contains all pairwise string comparisons. Keys are such that this need only be a one dimensional array. |
360 // Contains all pairwise string comparisons. Keys are such that this need only be a one dimensional array. |
341 $matches = array(); |
361 $matches = array(); |
342 foreach ( array_keys($orig) as $o ) { |
362 foreach ( array_keys( $orig ) as $o ) { |
343 foreach ( array_keys($final) as $f ) { |
363 foreach ( array_keys( $final ) as $f ) { |
344 $matches["$o,$f"] = $this->compute_string_distance( $orig[$o], $final[$f] ); |
364 $matches[ "$o,$f" ] = $this->compute_string_distance( $orig[ $o ], $final[ $f ] ); |
345 } |
365 } |
346 } |
366 } |
347 asort($matches); // Order by string distance. |
367 asort( $matches ); // Order by string distance. |
348 |
368 |
349 $orig_matches = array(); |
369 $orig_matches = array(); |
350 $final_matches = array(); |
370 $final_matches = array(); |
351 |
371 |
352 foreach ( $matches as $keys => $difference ) { |
372 foreach ( $matches as $keys => $difference ) { |
353 list($o, $f) = explode(',', $keys); |
373 list($o, $f) = explode( ',', $keys ); |
354 $o = (int) $o; |
374 $o = (int) $o; |
355 $f = (int) $f; |
375 $f = (int) $f; |
356 |
376 |
357 // Already have better matches for these guys |
377 // Already have better matches for these guys |
358 if ( isset($orig_matches[$o]) && isset($final_matches[$f]) ) |
378 if ( isset( $orig_matches[ $o ] ) && isset( $final_matches[ $f ] ) ) { |
359 continue; |
379 continue; |
|
380 } |
360 |
381 |
361 // First match for these guys. Must be best match |
382 // First match for these guys. Must be best match |
362 if ( !isset($orig_matches[$o]) && !isset($final_matches[$f]) ) { |
383 if ( ! isset( $orig_matches[ $o ] ) && ! isset( $final_matches[ $f ] ) ) { |
363 $orig_matches[$o] = $f; |
384 $orig_matches[ $o ] = $f; |
364 $final_matches[$f] = $o; |
385 $final_matches[ $f ] = $o; |
365 continue; |
386 continue; |
366 } |
387 } |
367 |
388 |
368 // Best match of this final is already taken? Must mean this final is a new row. |
389 // Best match of this final is already taken? Must mean this final is a new row. |
369 if ( isset($orig_matches[$o]) ) |
390 if ( isset( $orig_matches[ $o ] ) ) { |
370 $final_matches[$f] = 'x'; |
391 $final_matches[ $f ] = 'x'; |
371 |
392 } elseif ( isset( $final_matches[ $f ] ) ) { |
372 // Best match of this orig is already taken? Must mean this orig is a deleted row. |
393 // Best match of this orig is already taken? Must mean this orig is a deleted row. |
373 elseif ( isset($final_matches[$f]) ) |
394 $orig_matches[ $o ] = 'x'; |
374 $orig_matches[$o] = 'x'; |
395 } |
375 } |
396 } |
376 |
397 |
377 // We read the text in this order |
398 // We read the text in this order |
378 ksort($orig_matches); |
399 ksort( $orig_matches ); |
379 ksort($final_matches); |
400 ksort( $final_matches ); |
380 |
401 |
381 // Stores rows and blanks for each column. |
402 // Stores rows and blanks for each column. |
382 $orig_rows = $orig_rows_copy = array_keys($orig_matches); |
403 $orig_rows = $orig_rows_copy = array_keys( $orig_matches ); |
383 $final_rows = array_keys($final_matches); |
404 $final_rows = array_keys( $final_matches ); |
384 |
405 |
385 // Interleaves rows with blanks to keep matches aligned. |
406 // Interleaves rows with blanks to keep matches aligned. |
386 // We may end up with some extraneous blank rows, but we'll just ignore them later. |
407 // We may end up with some extraneous blank rows, but we'll just ignore them later. |
387 foreach ( $orig_rows_copy as $orig_row ) { |
408 foreach ( $orig_rows_copy as $orig_row ) { |
388 $final_pos = array_search($orig_matches[$orig_row], $final_rows, true); |
409 $final_pos = array_search( $orig_matches[ $orig_row ], $final_rows, true ); |
389 $orig_pos = (int) array_search($orig_row, $orig_rows, true); |
410 $orig_pos = (int) array_search( $orig_row, $orig_rows, true ); |
390 |
411 |
391 if ( false === $final_pos ) { // This orig is paired with a blank final. |
412 if ( false === $final_pos ) { // This orig is paired with a blank final. |
392 array_splice( $final_rows, $orig_pos, 0, -1 ); |
413 array_splice( $final_rows, $orig_pos, 0, -1 ); |
393 } elseif ( $final_pos < $orig_pos ) { // This orig's match is up a ways. Pad final with blank rows. |
414 } elseif ( $final_pos < $orig_pos ) { // This orig's match is up a ways. Pad final with blank rows. |
394 $diff_pos = $final_pos - $orig_pos; |
415 $diff_array = range( -1, $final_pos - $orig_pos ); |
395 while ( $diff_pos < 0 ) |
416 array_splice( $final_rows, $orig_pos, 0, $diff_array ); |
396 array_splice( $final_rows, $orig_pos, 0, $diff_pos++ ); |
|
397 } elseif ( $final_pos > $orig_pos ) { // This orig's match is down a ways. Pad orig with blank rows. |
417 } elseif ( $final_pos > $orig_pos ) { // This orig's match is down a ways. Pad orig with blank rows. |
398 $diff_pos = $orig_pos - $final_pos; |
418 $diff_array = range( -1, $orig_pos - $final_pos ); |
399 while ( $diff_pos < 0 ) |
419 array_splice( $orig_rows, $orig_pos, 0, $diff_array ); |
400 array_splice( $orig_rows, $orig_pos, 0, $diff_pos++ ); |
|
401 } |
420 } |
402 } |
421 } |
403 |
422 |
404 // Pad the ends with blank rows if the columns aren't the same length |
423 // Pad the ends with blank rows if the columns aren't the same length |
405 $diff_count = count($orig_rows) - count($final_rows); |
424 $diff_count = count( $orig_rows ) - count( $final_rows ); |
406 if ( $diff_count < 0 ) { |
425 if ( $diff_count < 0 ) { |
407 while ( $diff_count < 0 ) |
426 while ( $diff_count < 0 ) { |
408 array_push($orig_rows, $diff_count++); |
427 array_push( $orig_rows, $diff_count++ ); |
|
428 } |
409 } elseif ( $diff_count > 0 ) { |
429 } elseif ( $diff_count > 0 ) { |
410 $diff_count = -1 * $diff_count; |
430 $diff_count = -1 * $diff_count; |
411 while ( $diff_count < 0 ) |
431 while ( $diff_count < 0 ) { |
412 array_push($final_rows, $diff_count++); |
432 array_push( $final_rows, $diff_count++ ); |
413 } |
433 } |
414 |
434 } |
415 return array($orig_matches, $final_matches, $orig_rows, $final_rows); |
435 |
|
436 return array( $orig_matches, $final_matches, $orig_rows, $final_rows ); |
416 } |
437 } |
417 |
438 |
418 /** |
439 /** |
419 * Computes a number that is intended to reflect the "distance" between two strings. |
440 * Computes a number that is intended to reflect the "distance" between two strings. |
420 * |
441 * |
423 * @param string $string1 |
444 * @param string $string1 |
424 * @param string $string2 |
445 * @param string $string2 |
425 * @return int |
446 * @return int |
426 */ |
447 */ |
427 public function compute_string_distance( $string1, $string2 ) { |
448 public function compute_string_distance( $string1, $string2 ) { |
428 // Vectors containing character frequency for all chars in each string |
449 // Use an md5 hash of the strings for a count cache, as it's fast to generate, and collisions aren't a concern. |
429 $chars1 = count_chars($string1); |
450 $count_key1 = md5( $string1 ); |
430 $chars2 = count_chars($string2); |
451 $count_key2 = md5( $string2 ); |
431 |
452 |
432 // L1-norm of difference vector. |
453 // Cache vectors containing character frequency for all chars in each string. |
433 $difference = array_sum( array_map( array($this, 'difference'), $chars1, $chars2 ) ); |
454 if ( ! isset( $this->count_cache[ $count_key1 ] ) ) { |
|
455 $this->count_cache[ $count_key1 ] = count_chars( $string1 ); |
|
456 } |
|
457 if ( ! isset( $this->count_cache[ $count_key2 ] ) ) { |
|
458 $this->count_cache[ $count_key2 ] = count_chars( $string2 ); |
|
459 } |
|
460 |
|
461 $chars1 = $this->count_cache[ $count_key1 ]; |
|
462 $chars2 = $this->count_cache[ $count_key2 ]; |
|
463 |
|
464 $difference_key = md5( implode( ',', $chars1 ) . ':' . implode( ',', $chars2 ) ); |
|
465 if ( ! isset( $this->difference_cache[ $difference_key ] ) ) { |
|
466 // L1-norm of difference vector. |
|
467 $this->difference_cache[ $difference_key ] = array_sum( array_map( array( $this, 'difference' ), $chars1, $chars2 ) ); |
|
468 } |
|
469 |
|
470 $difference = $this->difference_cache[ $difference_key ]; |
434 |
471 |
435 // $string1 has zero length? Odd. Give huge penalty by not dividing. |
472 // $string1 has zero length? Odd. Give huge penalty by not dividing. |
436 if ( !$string1 ) |
473 if ( ! $string1 ) { |
437 return $difference; |
474 return $difference; |
|
475 } |
438 |
476 |
439 // Return distance per character (of string1). |
477 // Return distance per character (of string1). |
440 return $difference / strlen($string1); |
478 return $difference / strlen( $string1 ); |
441 } |
479 } |
442 |
480 |
443 /** |
481 /** |
444 * @ignore |
482 * @ignore |
445 * @since 2.6.0 |
483 * @since 2.6.0 |