src/cm/ext/diff.py
changeset 155 f436e2fef076
parent 154 8cb89c153140
parent 153 e4746a10edf5
child 156 6d447220fd1e
equal deleted inserted replaced
154:8cb89c153140 155:f436e2fef076
     1 # -*- coding: utf-8 -*-
       
     2 #
       
     3 # Copyright (C) 2004-2009 Edgewall Software
       
     4 # Copyright (C) 2004-2006 Christopher Lenz <cmlenz@gmx.de>
       
     5 # All rights reserved.
       
     6 #
       
     7 # This software is licensed as described in the file COPYING, which
       
     8 # you should have received as part of this distribution. The terms
       
     9 # are also available at http://trac.edgewall.org/wiki/TracLicense.
       
    10 #
       
    11 # This software consists of voluntary contributions made by many
       
    12 # individuals. For the exact contribution history, see the revision
       
    13 # history and logs, available at http://trac.edgewall.org/log/.
       
    14 #
       
    15 # Author: Christopher Lenz <cmlenz@gmx.de>
       
    16 
       
    17 from trac.util.html import escape, Markup
       
    18 from trac.util.text import expandtabs
       
    19 
       
    20 from difflib import SequenceMatcher
       
    21 import re
       
    22 
       
    23 __all__ = ['get_diff_options', 'hdf_diff', 'diff_blocks', 'unified_diff']
       
    24 
       
    25 
       
    26 def _get_change_extent(str1, str2):
       
    27     """
       
    28     Determines the extent of differences between two strings. Returns a tuple
       
    29     containing the offset at which the changes start, and the negative offset
       
    30     at which the changes end. If the two strings have neither a common prefix
       
    31     nor a common suffix, (0, 0) is returned.
       
    32     """
       
    33     start = 0
       
    34     limit = min(len(str1), len(str2))
       
    35     while start < limit and str1[start] == str2[start]:
       
    36         start += 1
       
    37     end = -1
       
    38     limit = limit - start
       
    39     while -end <= limit and str1[end] == str2[end]:
       
    40         end -= 1
       
    41     return (start, end + 1)
       
    42 
       
    43 def _get_opcodes(fromlines, tolines, ignore_blank_lines=False,
       
    44                  ignore_case=False, ignore_space_changes=False):
       
    45     """
       
    46     Generator built on top of SequenceMatcher.get_opcodes().
       
    47     
       
    48     This function detects line changes that should be ignored and emits them
       
    49     as tagged as 'equal', possibly joined with the preceding and/or following
       
    50     'equal' block.
       
    51     """
       
    52 
       
    53     def is_ignorable(tag, fromlines, tolines):
       
    54         if tag == 'delete' and ignore_blank_lines:
       
    55             if ''.join(fromlines) == '':
       
    56                 return True
       
    57         elif tag == 'insert' and ignore_blank_lines:
       
    58             if ''.join(tolines) == '':
       
    59                 return True
       
    60         elif tag == 'replace' and (ignore_case or ignore_space_changes):
       
    61             if len(fromlines) != len(tolines):
       
    62                 return False
       
    63             def f(str):
       
    64                 if ignore_case:
       
    65                     str = str.lower()
       
    66                 if ignore_space_changes:
       
    67                     str = ' '.join(str.split())
       
    68                 return str
       
    69             for i in range(len(fromlines)):
       
    70                 if f(fromlines[i]) != f(tolines[i]):
       
    71                     return False
       
    72             return True
       
    73 
       
    74     matcher = SequenceMatcher(None, fromlines, tolines)
       
    75     previous = None
       
    76     for tag, i1, i2, j1, j2 in matcher.get_opcodes():
       
    77         if tag == 'equal':
       
    78             if previous:
       
    79                 previous = (tag, previous[1], i2, previous[3], j2)
       
    80             else:
       
    81                 previous = (tag, i1, i2, j1, j2)
       
    82         else:
       
    83             if is_ignorable(tag, fromlines[i1:i2], tolines[j1:j2]):
       
    84                 if previous:
       
    85                     previous = 'equal', previous[1], i2, previous[3], j2
       
    86                 else:
       
    87                     previous = 'equal', i1, i2, j1, j2
       
    88                 continue
       
    89             if previous:
       
    90                 yield previous
       
    91             yield tag, i1, i2, j1, j2
       
    92             previous = None
       
    93 
       
    94     if previous:
       
    95         yield previous
       
    96 
       
    97 def _group_opcodes(opcodes, n=3):
       
    98     """
       
    99     Python 2.2 doesn't have SequenceMatcher.get_grouped_opcodes(), so let's
       
   100     provide equivalent here. The opcodes parameter can be any iterable or
       
   101     sequence.
       
   102 
       
   103     This function can also be used to generate full-context diffs by passing 
       
   104     None for the parameter n.
       
   105     """
       
   106     # Full context produces all the opcodes
       
   107     if n is None:
       
   108         yield list(opcodes)
       
   109         return
       
   110 
       
   111     # Otherwise we leave at most n lines with the tag 'equal' before and after
       
   112     # every change
       
   113     nn = n + n
       
   114     group = []
       
   115     for idx, (tag, i1, i2, j1, j2) in enumerate(opcodes):
       
   116         if idx == 0 and tag == 'equal': # Fixup leading unchanged block
       
   117             i1, j1 = max(i1, i2 - n), max(j1, j2 - n)
       
   118         elif tag == 'equal' and i2 - i1 > nn:
       
   119             group.append((tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n)))
       
   120             yield group
       
   121             group = []
       
   122             i1, j1 = max(i1, i2 - n), max(j1, j2 - n)
       
   123         group.append((tag, i1, i2, j1 ,j2))
       
   124 
       
   125     if group and not (len(group) == 1 and group[0][0] == 'equal'):
       
   126         if group[-1][0] == 'equal': # Fixup trailing unchanged block
       
   127             tag, i1, i2, j1, j2 = group[-1]
       
   128             group[-1] = tag, i1, min(i2, i1 + n), j1, min(j2, j1 + n)
       
   129         yield group
       
   130 
       
   131 def hdf_diff(*args, **kwargs):
       
   132     return diff_blocks(*args, **kwargs)
       
   133 
       
   134 def diff_blocks(fromlines, tolines, context=None, tabwidth=8,
       
   135                 ignore_blank_lines=0, ignore_case=0, ignore_space_changes=0):
       
   136     """Return an array that is adequate for adding to the data dictionary
       
   137 
       
   138     See the diff_div.html template.
       
   139     """
       
   140 
       
   141     type_map = {'replace': 'mod', 'delete': 'rem', 'insert': 'add',
       
   142                 'equal': 'unmod'}
       
   143 
       
   144     space_re = re.compile(' ( +)|^ ')
       
   145     def htmlify(match):
       
   146         div, mod = divmod(len(match.group(0)), 2)
       
   147         return div * '&nbsp; ' + mod * '&nbsp;'
       
   148 
       
   149     def markup_intraline_changes(opcodes):
       
   150         for tag, i1, i2, j1, j2 in opcodes:
       
   151             if tag == 'replace' and i2 - i1 == j2 - j1:
       
   152                 for i in range(i2 - i1):
       
   153                     fromline, toline = fromlines[i1 + i], tolines[j1 + i]
       
   154                     (start, end) = _get_change_extent(fromline, toline)
       
   155                     if start != 0 or end != 0:
       
   156                         last = end+len(fromline)
       
   157                         fromlines[i1+i] = fromline[:start] + '\0' + fromline[start:last] + \
       
   158                                        '\1' + fromline[last:]
       
   159                         last = end+len(toline)
       
   160                         tolines[j1+i] = toline[:start] + '\0' + toline[start:last] + \
       
   161                                      '\1' + toline[last:]
       
   162             yield tag, i1, i2, j1, j2
       
   163 
       
   164     changes = []
       
   165     opcodes = _get_opcodes(fromlines, tolines, ignore_blank_lines, ignore_case,
       
   166                            ignore_space_changes)
       
   167     for group in _group_opcodes(opcodes, context):
       
   168         blocks = []
       
   169         last_tag = None
       
   170         for tag, i1, i2, j1, j2 in markup_intraline_changes(group):
       
   171             if tag != last_tag:
       
   172                 blocks.append({'type': type_map[tag],
       
   173                                'base': {'offset': i1, 'lines': []},
       
   174                                'changed': {'offset': j1, 'lines': []}})
       
   175             if tag == 'equal':
       
   176                 for line in fromlines[i1:i2]:
       
   177                     line = line.expandtabs(tabwidth)
       
   178                     line = space_re.sub(htmlify, escape(line, quotes=False))
       
   179                     blocks[-1]['base']['lines'].append(Markup(unicode(line)))
       
   180                 for line in tolines[j1:j2]:
       
   181                     line = line.expandtabs(tabwidth)
       
   182                     line = space_re.sub(htmlify, escape(line, quotes=False))
       
   183                     blocks[-1]['changed']['lines'].append(Markup(unicode(line)))
       
   184             else:
       
   185                 if tag in ('replace', 'delete'):
       
   186                     for line in fromlines[i1:i2]:
       
   187                         line = expandtabs(line, tabwidth, '\0\1')
       
   188                         line = escape(line, quotes=False)
       
   189                         line = '<del>'.join([space_re.sub(htmlify, seg)
       
   190                                              for seg in line.split('\0')])
       
   191                         line = line.replace('\1', '</del>')
       
   192                         blocks[-1]['base']['lines'].append(
       
   193                             Markup(unicode(line)))
       
   194                 if tag in ('replace', 'insert'):
       
   195                     for line in tolines[j1:j2]:
       
   196                         line = expandtabs(line, tabwidth, '\0\1')
       
   197                         line = escape(line, quotes=False)
       
   198                         line = '<ins>'.join([space_re.sub(htmlify, seg)
       
   199                                              for seg in line.split('\0')])
       
   200                         line = line.replace('\1', '</ins>')
       
   201                         blocks[-1]['changed']['lines'].append(
       
   202                             Markup(unicode(line)))
       
   203         changes.append(blocks)
       
   204     return changes
       
   205 
       
   206 def unified_diff(fromlines, tolines, context=None, ignore_blank_lines=0,
       
   207                  ignore_case=0, ignore_space_changes=0):
       
   208     opcodes = _get_opcodes(fromlines, tolines, ignore_blank_lines, ignore_case,
       
   209                            ignore_space_changes)
       
   210     for group in _group_opcodes(opcodes, context):
       
   211         i1, i2, j1, j2 = group[0][1], group[-1][2], group[0][3], group[-1][4]
       
   212         if i1 == 0 and i2 == 0:
       
   213             i1, i2 = -1, -1 # support for 'A'dd changes
       
   214         yield '@@ -%d,%d +%d,%d @@' % (i1 + 1, i2 - i1, j1 + 1, j2 - j1)
       
   215         for tag, i1, i2, j1, j2 in group:
       
   216             if tag == 'equal':
       
   217                 for line in fromlines[i1:i2]:
       
   218                     yield ' ' + line
       
   219             else:
       
   220                 if tag in ('replace', 'delete'):
       
   221                     for line in fromlines[i1:i2]:
       
   222                         yield '-' + line
       
   223                 if tag in ('replace', 'insert'):
       
   224                     for line in tolines[j1:j2]:
       
   225                         yield '+' + line
       
   226 
       
   227 def get_diff_options(req):
       
   228     options_data = {}
       
   229     data = {'options': options_data}
       
   230     
       
   231     def get_bool_option(name, default=0):
       
   232         pref = int(req.session.get('diff_' + name, default))
       
   233         arg = int(req.args.has_key(name))
       
   234         if req.args.has_key('update') and arg != pref:
       
   235             req.session['diff_' + name] = arg
       
   236         else:
       
   237             arg = pref
       
   238         return arg
       
   239 
       
   240     pref = req.session.get('diff_style', 'inline')
       
   241     style = req.args.get('style', pref)
       
   242     if req.args.has_key('update') and style != pref:
       
   243         req.session['diff_style'] = style
       
   244     data['style'] = style
       
   245 
       
   246     pref = int(req.session.get('diff_contextlines', 2))
       
   247     try:
       
   248         arg = int(req.args.get('contextlines', pref))
       
   249     except ValueError:
       
   250         arg = -1
       
   251     if req.args.has_key('update') and arg != pref:
       
   252         req.session['diff_contextlines'] = arg
       
   253     options = ['-U%d' % arg]
       
   254     options_data['contextlines'] = arg
       
   255 
       
   256     arg = get_bool_option('ignoreblanklines')
       
   257     if arg:
       
   258         options.append('-B')
       
   259     options_data['ignoreblanklines'] = arg
       
   260 
       
   261     arg = get_bool_option('ignorecase')
       
   262     if arg:
       
   263         options.append('-i')
       
   264     options_data['ignorecase'] = arg
       
   265 
       
   266     arg = get_bool_option('ignorewhitespace')
       
   267     if arg:
       
   268         options.append('-b')
       
   269     options_data['ignorewhitespace'] = arg
       
   270 
       
   271     return (style, options, data)