src/cm/ext/diff_match_patch.py
changeset 250 cae2de810f77
equal deleted inserted replaced
249:31934b8c7bef 250:cae2de810f77
       
     1 #!/usr/bin/python2.4
       
     2 
       
     3 """Diff Match and Patch
       
     4 
       
     5 Copyright 2006 Google Inc.
       
     6 http://code.google.com/p/google-diff-match-patch/
       
     7 
       
     8 Licensed under the Apache License, Version 2.0 (the "License");
       
     9 you may not use this file except in compliance with the License.
       
    10 You may obtain a copy of the License at
       
    11 
       
    12   http://www.apache.org/licenses/LICENSE-2.0
       
    13 
       
    14 Unless required by applicable law or agreed to in writing, software
       
    15 distributed under the License is distributed on an "AS IS" BASIS,
       
    16 WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
       
    17 See the License for the specific language governing permissions and
       
    18 limitations under the License.
       
    19 """
       
    20 
       
    21 """Functions for diff, match and patch.
       
    22 
       
    23 Computes the difference between two texts to create a patch.
       
    24 Applies the patch onto another text, allowing for errors.
       
    25 """
       
    26 
       
    27 __author__ = 'fraser@google.com (Neil Fraser)'
       
    28 
       
    29 import math
       
    30 import time
       
    31 import urllib
       
    32 import re
       
    33 
       
    34 class diff_match_patch:
       
    35   """Class containing the diff, match and patch methods.
       
    36 
       
    37   Also contains the behaviour settings.
       
    38   """
       
    39 
       
    40   def __init__(self):
       
    41     """Inits a diff_match_patch object with default settings.
       
    42     Redefine these in your program to override the defaults.
       
    43     """
       
    44 
       
    45     # Number of seconds to map a diff before giving up (0 for infinity).
       
    46     self.Diff_Timeout = 1.0
       
    47     # Cost of an empty edit operation in terms of edit characters.
       
    48     self.Diff_EditCost = 4
       
    49     # The size beyond which the double-ended diff activates.
       
    50     # Double-ending is twice as fast, but less accurate.
       
    51     self.Diff_DualThreshold = 32
       
    52     # At what point is no match declared (0.0 = perfection, 1.0 = very loose).
       
    53     self.Match_Threshold = 0.5
       
    54     # How far to search for a match (0 = exact location, 1000+ = broad match).
       
    55     # A match this many characters away from the expected location will add
       
    56     # 1.0 to the score (0.0 is a perfect match).
       
    57     self.Match_Distance = 1000
       
    58     # When deleting a large block of text (over ~64 characters), how close does
       
    59     # the contents have to match the expected contents. (0.0 = perfection,
       
    60     # 1.0 = very loose).  Note that Match_Threshold controls how closely the
       
    61     # end points of a delete need to match.
       
    62     self.Patch_DeleteThreshold = 0.5
       
    63     # Chunk size for context length.
       
    64     self.Patch_Margin = 4
       
    65 
       
    66     # How many bits in a number?
       
    67     # Python has no maximum, thus to disable patch splitting set to 0.
       
    68     # However to avoid long patches in certain pathological cases, use 32.
       
    69     # Multiple short patches (using native ints) are much faster than long ones.
       
    70     self.Match_MaxBits = 32
       
    71 
       
    72   #  DIFF FUNCTIONS
       
    73 
       
    74   # The data structure representing a diff is an array of tuples:
       
    75   # [(DIFF_DELETE, "Hello"), (DIFF_INSERT, "Goodbye"), (DIFF_EQUAL, " world.")]
       
    76   # which means: delete "Hello", add "Goodbye" and keep " world."
       
    77   DIFF_DELETE = -1
       
    78   DIFF_INSERT = 1
       
    79   DIFF_EQUAL = 0
       
    80 
       
    81   def diff_main(self, text1, text2, checklines=True):
       
    82     """Find the differences between two texts.  Simplifies the problem by
       
    83       stripping any common prefix or suffix off the texts before diffing.
       
    84 
       
    85     Args:
       
    86       text1: Old string to be diffed.
       
    87       text2: New string to be diffed.
       
    88       checklines: Optional speedup flag.  If present and false, then don't run
       
    89         a line-level diff first to identify the changed areas.
       
    90         Defaults to true, which does a faster, slightly less optimal diff.
       
    91 
       
    92     Returns:
       
    93       Array of changes.
       
    94     """
       
    95 
       
    96     # Check for equality (speedup)
       
    97     if text1 == text2:
       
    98       return [(self.DIFF_EQUAL, text1)]
       
    99 
       
   100     # Trim off common prefix (speedup)
       
   101     commonlength = self.diff_commonPrefix(text1, text2)
       
   102     commonprefix = text1[:commonlength]
       
   103     text1 = text1[commonlength:]
       
   104     text2 = text2[commonlength:]
       
   105 
       
   106     # Trim off common suffix (speedup)
       
   107     commonlength = self.diff_commonSuffix(text1, text2)
       
   108     if commonlength == 0:
       
   109       commonsuffix = ''
       
   110     else:
       
   111       commonsuffix = text1[-commonlength:]
       
   112       text1 = text1[:-commonlength]
       
   113       text2 = text2[:-commonlength]
       
   114 
       
   115     # Compute the diff on the middle block
       
   116     diffs = self.diff_compute(text1, text2, checklines)
       
   117 
       
   118     # Restore the prefix and suffix
       
   119     if commonprefix:
       
   120       diffs[:0] = [(self.DIFF_EQUAL, commonprefix)]
       
   121     if commonsuffix:
       
   122       diffs.append((self.DIFF_EQUAL, commonsuffix))
       
   123     self.diff_cleanupMerge(diffs)
       
   124     return diffs
       
   125 
       
   126   def diff_compute(self, text1, text2, checklines):
       
   127     """Find the differences between two texts.  Assumes that the texts do not
       
   128       have any common prefix or suffix.
       
   129 
       
   130     Args:
       
   131       text1: Old string to be diffed.
       
   132       text2: New string to be diffed.
       
   133       checklines: Speedup flag.  If false, then don't run a line-level diff
       
   134         first to identify the changed areas.
       
   135         If true, then run a faster, slightly less optimal diff.
       
   136 
       
   137     Returns:
       
   138       Array of changes.
       
   139     """
       
   140     if not text1:
       
   141       # Just add some text (speedup)
       
   142       return [(self.DIFF_INSERT, text2)]
       
   143 
       
   144     if not text2:
       
   145       # Just delete some text (speedup)
       
   146       return [(self.DIFF_DELETE, text1)]
       
   147 
       
   148     if len(text1) > len(text2):
       
   149       (longtext, shorttext) = (text1, text2)
       
   150     else:
       
   151       (shorttext, longtext) = (text1, text2)
       
   152     i = longtext.find(shorttext)
       
   153     if i != -1:
       
   154       # Shorter text is inside the longer text (speedup)
       
   155       diffs = [(self.DIFF_INSERT, longtext[:i]), (self.DIFF_EQUAL, shorttext),
       
   156                (self.DIFF_INSERT, longtext[i + len(shorttext):])]
       
   157       # Swap insertions for deletions if diff is reversed.
       
   158       if len(text1) > len(text2):
       
   159         diffs[0] = (self.DIFF_DELETE, diffs[0][1])
       
   160         diffs[2] = (self.DIFF_DELETE, diffs[2][1])
       
   161       return diffs
       
   162     longtext = shorttext = None  # Garbage collect.
       
   163 
       
   164     # Check to see if the problem can be split in two.
       
   165     hm = self.diff_halfMatch(text1, text2)
       
   166     if hm:
       
   167       # A half-match was found, sort out the return data.
       
   168       (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
       
   169       # Send both pairs off for separate processing.
       
   170       diffs_a = self.diff_main(text1_a, text2_a, checklines)
       
   171       diffs_b = self.diff_main(text1_b, text2_b, checklines)
       
   172       # Merge the results.
       
   173       return diffs_a + [(self.DIFF_EQUAL, mid_common)] + diffs_b
       
   174 
       
   175     # Perform a real diff.
       
   176     if checklines and (len(text1) < 100 or len(text2) < 100):
       
   177       checklines = False  # Too trivial for the overhead.
       
   178     if checklines:
       
   179       # Scan the text on a line-by-line basis first.
       
   180       (text1, text2, linearray) = self.diff_linesToChars(text1, text2)
       
   181 
       
   182     diffs = self.diff_map(text1, text2)
       
   183     if not diffs:  # No acceptable result.
       
   184       diffs = [(self.DIFF_DELETE, text1), (self.DIFF_INSERT, text2)]
       
   185     if checklines:
       
   186       # Convert the diff back to original text.
       
   187       self.diff_charsToLines(diffs, linearray)
       
   188       # Eliminate freak matches (e.g. blank lines)
       
   189       self.diff_cleanupSemantic(diffs)
       
   190 
       
   191       # Rediff any replacement blocks, this time character-by-character.
       
   192       # Add a dummy entry at the end.
       
   193       diffs.append((self.DIFF_EQUAL, ''))
       
   194       pointer = 0
       
   195       count_delete = 0
       
   196       count_insert = 0
       
   197       text_delete = ''
       
   198       text_insert = ''
       
   199       while pointer < len(diffs):
       
   200         if diffs[pointer][0] == self.DIFF_INSERT:
       
   201           count_insert += 1
       
   202           text_insert += diffs[pointer][1]
       
   203         elif diffs[pointer][0] == self.DIFF_DELETE:
       
   204           count_delete += 1
       
   205           text_delete += diffs[pointer][1]
       
   206         elif diffs[pointer][0] == self.DIFF_EQUAL:
       
   207           # Upon reaching an equality, check for prior redundancies.
       
   208           if count_delete >= 1 and count_insert >= 1:
       
   209             # Delete the offending records and add the merged ones.
       
   210             a = self.diff_main(text_delete, text_insert, False)
       
   211             diffs[pointer - count_delete - count_insert : pointer] = a
       
   212             pointer = pointer - count_delete - count_insert + len(a)
       
   213           count_insert = 0
       
   214           count_delete = 0
       
   215           text_delete = ''
       
   216           text_insert = ''
       
   217 
       
   218         pointer += 1
       
   219 
       
   220       diffs.pop()  # Remove the dummy entry at the end.
       
   221     return diffs
       
   222 
       
   223   def diff_linesToChars(self, text1, text2):
       
   224     """Split two texts into an array of strings.  Reduce the texts to a string
       
   225     of hashes where each Unicode character represents one line.
       
   226 
       
   227     Args:
       
   228       text1: First string.
       
   229       text2: Second string.
       
   230 
       
   231     Returns:
       
   232       Three element tuple, containing the encoded text1, the encoded text2 and
       
   233       the array of unique strings.  The zeroth element of the array of unique
       
   234       strings is intentionally blank.
       
   235     """
       
   236     lineArray = []  # e.g. lineArray[4] == "Hello\n"
       
   237     lineHash = {}   # e.g. lineHash["Hello\n"] == 4
       
   238 
       
   239     # "\x00" is a valid character, but various debuggers don't like it.
       
   240     # So we'll insert a junk entry to avoid generating a null character.
       
   241     lineArray.append('')
       
   242 
       
   243     def diff_linesToCharsMunge(text):
       
   244       """Split a text into an array of strings.  Reduce the texts to a string
       
   245       of hashes where each Unicode character represents one line.
       
   246       Modifies linearray and linehash through being a closure.
       
   247 
       
   248       Args:
       
   249         text: String to encode.
       
   250 
       
   251       Returns:
       
   252         Encoded string.
       
   253       """
       
   254       chars = []
       
   255       # Walk the text, pulling out a substring for each line.
       
   256       # text.split('\n') would would temporarily double our memory footprint.
       
   257       # Modifying text would create many large strings to garbage collect.
       
   258       lineStart = 0
       
   259       lineEnd = -1
       
   260       while lineEnd < len(text) - 1:
       
   261         lineEnd = text.find('\n', lineStart)
       
   262         if lineEnd == -1:
       
   263           lineEnd = len(text) - 1
       
   264         line = text[lineStart:lineEnd + 1]
       
   265         lineStart = lineEnd + 1
       
   266 
       
   267         if line in lineHash:
       
   268           chars.append(unichr(lineHash[line]))
       
   269         else:
       
   270           lineArray.append(line)
       
   271           lineHash[line] = len(lineArray) - 1
       
   272           chars.append(unichr(len(lineArray) - 1))
       
   273       return "".join(chars)
       
   274 
       
   275     chars1 = diff_linesToCharsMunge(text1)
       
   276     chars2 = diff_linesToCharsMunge(text2)
       
   277     return (chars1, chars2, lineArray)
       
   278 
       
   279   def diff_charsToLines(self, diffs, lineArray):
       
   280     """Rehydrate the text in a diff from a string of line hashes to real lines
       
   281     of text.
       
   282 
       
   283     Args:
       
   284       diffs: Array of diff tuples.
       
   285       lineArray: Array of unique strings.
       
   286     """
       
   287     for x in xrange(len(diffs)):
       
   288       text = []
       
   289       for char in diffs[x][1]:
       
   290         text.append(lineArray[ord(char)])
       
   291       diffs[x] = (diffs[x][0], "".join(text))
       
   292 
       
   293   def diff_map(self, text1, text2):
       
   294     """Explore the intersection points between the two texts.
       
   295 
       
   296     Args:
       
   297       text1: Old string to be diffed.
       
   298       text2: New string to be diffed.
       
   299 
       
   300     Returns:
       
   301       Array of diff tuples or None if no diff available.
       
   302     """
       
   303 
       
   304     # Unlike in most languages, Python counts time in seconds.
       
   305     s_end = time.time() + self.Diff_Timeout  # Don't run for too long.
       
   306     # Cache the text lengths to prevent multiple calls.
       
   307     text1_length = len(text1)
       
   308     text2_length = len(text2)
       
   309     max_d = text1_length + text2_length - 1
       
   310     doubleEnd = self.Diff_DualThreshold * 2 < max_d
       
   311     v_map1 = []
       
   312     v_map2 = []
       
   313     v1 = {}
       
   314     v2 = {}
       
   315     v1[1] = 0
       
   316     v2[1] = 0
       
   317     footsteps = {}
       
   318     done = False
       
   319     # If the total number of characters is odd, then the front path will
       
   320     # collide with the reverse path.
       
   321     front = (text1_length + text2_length) % 2
       
   322     for d in xrange(max_d):
       
   323       # Bail out if timeout reached.
       
   324       if self.Diff_Timeout > 0 and time.time() > s_end:
       
   325         return None
       
   326 
       
   327       # Walk the front path one step.
       
   328       v_map1.append({})
       
   329       for k in xrange(-d, d + 1, 2):
       
   330         if k == -d or k != d and v1[k - 1] < v1[k + 1]:
       
   331           x = v1[k + 1]
       
   332         else:
       
   333           x = v1[k - 1] + 1
       
   334         y = x - k
       
   335         if doubleEnd:
       
   336           footstep = (x, y)
       
   337           if front and footstep in footsteps:
       
   338             done = True
       
   339           if not front:
       
   340             footsteps[footstep] = d
       
   341 
       
   342         while (not done and x < text1_length and y < text2_length and
       
   343                text1[x] == text2[y]):
       
   344           x += 1
       
   345           y += 1
       
   346           if doubleEnd:
       
   347             footstep = (x, y)
       
   348             if front and footstep in footsteps:
       
   349               done = True
       
   350             if not front:
       
   351               footsteps[footstep] = d
       
   352 
       
   353         v1[k] = x
       
   354         v_map1[d][(x, y)] = True
       
   355         if x == text1_length and y == text2_length:
       
   356           # Reached the end in single-path mode.
       
   357           return self.diff_path1(v_map1, text1, text2)
       
   358         elif done:
       
   359           # Front path ran over reverse path.
       
   360           v_map2 = v_map2[:footsteps[footstep] + 1]
       
   361           a = self.diff_path1(v_map1, text1[:x], text2[:y])
       
   362           b = self.diff_path2(v_map2, text1[x:], text2[y:])
       
   363           return a + b
       
   364 
       
   365       if doubleEnd:
       
   366         # Walk the reverse path one step.
       
   367         v_map2.append({})
       
   368         for k in xrange(-d, d + 1, 2):
       
   369           if k == -d or k != d and v2[k - 1] < v2[k + 1]:
       
   370             x = v2[k + 1]
       
   371           else:
       
   372             x = v2[k - 1] + 1
       
   373           y = x - k
       
   374           footstep = (text1_length - x, text2_length - y)
       
   375           if not front and footstep in footsteps:
       
   376             done = True
       
   377           if front:
       
   378             footsteps[footstep] = d
       
   379           while (not done and x < text1_length and y < text2_length and
       
   380                  text1[-x - 1] == text2[-y - 1]):
       
   381             x += 1
       
   382             y += 1
       
   383             footstep = (text1_length - x, text2_length - y)
       
   384             if not front and footstep in footsteps:
       
   385               done = True
       
   386             if front:
       
   387               footsteps[footstep] = d
       
   388 
       
   389           v2[k] = x
       
   390           v_map2[d][(x, y)] = True
       
   391           if done:
       
   392             # Reverse path ran over front path.
       
   393             v_map1 = v_map1[:footsteps[footstep] + 1]
       
   394             a = self.diff_path1(v_map1, text1[:text1_length - x],
       
   395                                 text2[:text2_length - y])
       
   396             b = self.diff_path2(v_map2, text1[text1_length - x:],
       
   397                                 text2[text2_length - y:])
       
   398             return a + b
       
   399 
       
   400     # Number of diffs equals number of characters, no commonality at all.
       
   401     return None
       
   402 
       
   403   def diff_path1(self, v_map, text1, text2):
       
   404     """Work from the middle back to the start to determine the path.
       
   405 
       
   406     Args:
       
   407       v_map: Array of paths.
       
   408       text1: Old string fragment to be diffed.
       
   409       text2: New string fragment to be diffed.
       
   410 
       
   411     Returns:
       
   412       Array of diff tuples.
       
   413     """
       
   414     path = []
       
   415     x = len(text1)
       
   416     y = len(text2)
       
   417     last_op = None
       
   418     for d in xrange(len(v_map) - 2, -1, -1):
       
   419       while True:
       
   420         if (x - 1, y) in v_map[d]:
       
   421           x -= 1
       
   422           if last_op == self.DIFF_DELETE:
       
   423             path[0] = (self.DIFF_DELETE, text1[x] + path[0][1])
       
   424           else:
       
   425             path[:0] = [(self.DIFF_DELETE, text1[x])]
       
   426           last_op = self.DIFF_DELETE
       
   427           break
       
   428         elif (x, y - 1) in v_map[d]:
       
   429           y -= 1
       
   430           if last_op == self.DIFF_INSERT:
       
   431             path[0] = (self.DIFF_INSERT, text2[y] + path[0][1])
       
   432           else:
       
   433             path[:0] = [(self.DIFF_INSERT, text2[y])]
       
   434           last_op = self.DIFF_INSERT
       
   435           break
       
   436         else:
       
   437           x -= 1
       
   438           y -= 1
       
   439           assert text1[x] == text2[y], ("No diagonal.  " +
       
   440               "Can't happen. (diff_path1)")
       
   441           if last_op == self.DIFF_EQUAL:
       
   442             path[0] = (self.DIFF_EQUAL, text1[x] + path[0][1])
       
   443           else:
       
   444             path[:0] = [(self.DIFF_EQUAL, text1[x])]
       
   445           last_op = self.DIFF_EQUAL
       
   446     return path
       
   447 
       
   448   def diff_path2(self, v_map, text1, text2):
       
   449     """Work from the middle back to the end to determine the path.
       
   450 
       
   451     Args:
       
   452       v_map: Array of paths.
       
   453       text1: Old string fragment to be diffed.
       
   454       text2: New string fragment to be diffed.
       
   455 
       
   456     Returns:
       
   457       Array of diff tuples.
       
   458     """
       
   459     path = []
       
   460     x = len(text1)
       
   461     y = len(text2)
       
   462     last_op = None
       
   463     for d in xrange(len(v_map) - 2, -1, -1):
       
   464       while True:
       
   465         if (x - 1, y) in v_map[d]:
       
   466           x -= 1
       
   467           if last_op == self.DIFF_DELETE:
       
   468             path[-1] = (self.DIFF_DELETE, path[-1][1] + text1[-x - 1])
       
   469           else:
       
   470             path.append((self.DIFF_DELETE, text1[-x - 1]))
       
   471           last_op = self.DIFF_DELETE
       
   472           break
       
   473         elif (x, y - 1) in v_map[d]:
       
   474           y -= 1
       
   475           if last_op == self.DIFF_INSERT:
       
   476             path[-1] = (self.DIFF_INSERT, path[-1][1] + text2[-y - 1])
       
   477           else:
       
   478             path.append((self.DIFF_INSERT, text2[-y - 1]))
       
   479           last_op = self.DIFF_INSERT
       
   480           break
       
   481         else:
       
   482           x -= 1
       
   483           y -= 1
       
   484           assert text1[-x - 1] == text2[-y - 1], ("No diagonal.  " +
       
   485               "Can't happen. (diff_path2)")
       
   486           if last_op == self.DIFF_EQUAL:
       
   487             path[-1] = (self.DIFF_EQUAL, path[-1][1] + text1[-x - 1])
       
   488           else:
       
   489             path.append((self.DIFF_EQUAL, text1[-x - 1]))
       
   490           last_op = self.DIFF_EQUAL
       
   491     return path
       
   492 
       
   493   def diff_commonPrefix(self, text1, text2):
       
   494     """Determine the common prefix of two strings.
       
   495 
       
   496     Args:
       
   497       text1: First string.
       
   498       text2: Second string.
       
   499 
       
   500     Returns:
       
   501       The number of characters common to the start of each string.
       
   502     """
       
   503     # Quick check for common null cases.
       
   504     if not text1 or not text2 or text1[0] != text2[0]:
       
   505       return 0
       
   506     # Binary search.
       
   507     # Performance analysis: http://neil.fraser.name/news/2007/10/09/
       
   508     pointermin = 0
       
   509     pointermax = min(len(text1), len(text2))
       
   510     pointermid = pointermax
       
   511     pointerstart = 0
       
   512     while pointermin < pointermid:
       
   513       if text1[pointerstart:pointermid] == text2[pointerstart:pointermid]:
       
   514         pointermin = pointermid
       
   515         pointerstart = pointermin
       
   516       else:
       
   517         pointermax = pointermid
       
   518       pointermid = int((pointermax - pointermin) / 2 + pointermin)
       
   519     return pointermid
       
   520 
       
   521   def diff_commonSuffix(self, text1, text2):
       
   522     """Determine the common suffix of two strings.
       
   523 
       
   524     Args:
       
   525       text1: First string.
       
   526       text2: Second string.
       
   527 
       
   528     Returns:
       
   529       The number of characters common to the end of each string.
       
   530     """
       
   531     # Quick check for common null cases.
       
   532     if not text1 or not text2 or text1[-1] != text2[-1]:
       
   533       return 0
       
   534     # Binary search.
       
   535     # Performance analysis: http://neil.fraser.name/news/2007/10/09/
       
   536     pointermin = 0
       
   537     pointermax = min(len(text1), len(text2))
       
   538     pointermid = pointermax
       
   539     pointerend = 0
       
   540     while pointermin < pointermid:
       
   541       if (text1[-pointermid:len(text1) - pointerend] ==
       
   542           text2[-pointermid:len(text2) - pointerend]):
       
   543         pointermin = pointermid
       
   544         pointerend = pointermin
       
   545       else:
       
   546         pointermax = pointermid
       
   547       pointermid = int((pointermax - pointermin) / 2 + pointermin)
       
   548     return pointermid
       
   549 
       
   550   def diff_halfMatch(self, text1, text2):
       
   551     """Do the two texts share a substring which is at least half the length of
       
   552     the longer text?
       
   553 
       
   554     Args:
       
   555       text1: First string.
       
   556       text2: Second string.
       
   557 
       
   558     Returns:
       
   559       Five element Array, containing the prefix of text1, the suffix of text1,
       
   560       the prefix of text2, the suffix of text2 and the common middle.  Or None
       
   561       if there was no match.
       
   562     """
       
   563     if len(text1) > len(text2):
       
   564       (longtext, shorttext) = (text1, text2)
       
   565     else:
       
   566       (shorttext, longtext) = (text1, text2)
       
   567     if len(longtext) < 10 or len(shorttext) < 1:
       
   568       return None  # Pointless.
       
   569 
       
   570     def diff_halfMatchI(longtext, shorttext, i):
       
   571       """Does a substring of shorttext exist within longtext such that the
       
   572       substring is at least half the length of longtext?
       
   573       Closure, but does not reference any external variables.
       
   574 
       
   575       Args:
       
   576         longtext: Longer string.
       
   577         shorttext: Shorter string.
       
   578         i: Start index of quarter length substring within longtext.
       
   579 
       
   580       Returns:
       
   581         Five element Array, containing the prefix of longtext, the suffix of
       
   582         longtext, the prefix of shorttext, the suffix of shorttext and the
       
   583         common middle.  Or None if there was no match.
       
   584       """
       
   585       seed = longtext[i:i + len(longtext) / 4]
       
   586       best_common = ''
       
   587       j = shorttext.find(seed)
       
   588       while j != -1:
       
   589         prefixLength = self.diff_commonPrefix(longtext[i:], shorttext[j:])
       
   590         suffixLength = self.diff_commonSuffix(longtext[:i], shorttext[:j])
       
   591         if len(best_common) < suffixLength + prefixLength:
       
   592           best_common = (shorttext[j - suffixLength:j] +
       
   593               shorttext[j:j + prefixLength])
       
   594           best_longtext_a = longtext[:i - suffixLength]
       
   595           best_longtext_b = longtext[i + prefixLength:]
       
   596           best_shorttext_a = shorttext[:j - suffixLength]
       
   597           best_shorttext_b = shorttext[j + prefixLength:]
       
   598         j = shorttext.find(seed, j + 1)
       
   599 
       
   600       if len(best_common) >= len(longtext) / 2:
       
   601         return (best_longtext_a, best_longtext_b,
       
   602                 best_shorttext_a, best_shorttext_b, best_common)
       
   603       else:
       
   604         return None
       
   605 
       
   606     # First check if the second quarter is the seed for a half-match.
       
   607     hm1 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 3) / 4)
       
   608     # Check again based on the third quarter.
       
   609     hm2 = diff_halfMatchI(longtext, shorttext, (len(longtext) + 1) / 2)
       
   610     if not hm1 and not hm2:
       
   611       return None
       
   612     elif not hm2:
       
   613       hm = hm1
       
   614     elif not hm1:
       
   615       hm = hm2
       
   616     else:
       
   617       # Both matched.  Select the longest.
       
   618       if len(hm1[4]) > len(hm2[4]):
       
   619         hm = hm1
       
   620       else:
       
   621         hm = hm2
       
   622 
       
   623     # A half-match was found, sort out the return data.
       
   624     if len(text1) > len(text2):
       
   625       (text1_a, text1_b, text2_a, text2_b, mid_common) = hm
       
   626     else:
       
   627       (text2_a, text2_b, text1_a, text1_b, mid_common) = hm
       
   628     return (text1_a, text1_b, text2_a, text2_b, mid_common)
       
   629 
       
   630   def diff_cleanupSemantic(self, diffs):
       
   631     """Reduce the number of edits by eliminating semantically trivial
       
   632     equalities.
       
   633 
       
   634     Args:
       
   635       diffs: Array of diff tuples.
       
   636     """
       
   637     changes = False
       
   638     equalities = []  # Stack of indices where equalities are found.
       
   639     lastequality = None  # Always equal to equalities[-1][1]
       
   640     pointer = 0  # Index of current position.
       
   641     length_changes1 = 0  # Number of chars that changed prior to the equality.
       
   642     length_changes2 = 0  # Number of chars that changed after the equality.
       
   643     while pointer < len(diffs):
       
   644       if diffs[pointer][0] == self.DIFF_EQUAL:  # equality found
       
   645         equalities.append(pointer)
       
   646         length_changes1 = length_changes2
       
   647         length_changes2 = 0
       
   648         lastequality = diffs[pointer][1]
       
   649       else:  # an insertion or deletion
       
   650         length_changes2 += len(diffs[pointer][1])
       
   651         if (lastequality != None and (len(lastequality) <= length_changes1) and
       
   652             (len(lastequality) <= length_changes2)):
       
   653           # Duplicate record
       
   654           diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
       
   655           # Change second copy to insert.
       
   656           diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
       
   657               diffs[equalities[-1] + 1][1])
       
   658           # Throw away the equality we just deleted.
       
   659           equalities.pop()
       
   660           # Throw away the previous equality (it needs to be reevaluated).
       
   661           if len(equalities) != 0:
       
   662             equalities.pop()
       
   663           if len(equalities):
       
   664             pointer = equalities[-1]
       
   665           else:
       
   666             pointer = -1
       
   667           length_changes1 = 0  # Reset the counters.
       
   668           length_changes2 = 0
       
   669           lastequality = None
       
   670           changes = True
       
   671       pointer += 1
       
   672 
       
   673     if changes:
       
   674       self.diff_cleanupMerge(diffs)
       
   675 
       
   676     self.diff_cleanupSemanticLossless(diffs)
       
   677 
       
   678   def diff_cleanupSemanticLossless(self, diffs):
       
   679     """Look for single edits surrounded on both sides by equalities
       
   680     which can be shifted sideways to align the edit to a word boundary.
       
   681     e.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
       
   682 
       
   683     Args:
       
   684       diffs: Array of diff tuples.
       
   685     """
       
   686 
       
   687     def diff_cleanupSemanticScore(one, two):
       
   688       """Given two strings, compute a score representing whether the
       
   689       internal boundary falls on logical boundaries.
       
   690       Scores range from 5 (best) to 0 (worst).
       
   691       Closure, but does not reference any external variables.
       
   692 
       
   693       Args:
       
   694         one: First string.
       
   695         two: Second string.
       
   696 
       
   697       Returns:
       
   698         The score.
       
   699       """
       
   700       if not one or not two:
       
   701         # Edges are the best.
       
   702         return 5
       
   703 
       
   704       # Each port of this function behaves slightly differently due to
       
   705       # subtle differences in each language's definition of things like
       
   706       # 'whitespace'.  Since this function's purpose is largely cosmetic,
       
   707       # the choice has been made to use each language's native features
       
   708       # rather than force total conformity.
       
   709       score = 0
       
   710       # One point for non-alphanumeric.
       
   711       if not one[-1].isalnum() or not two[0].isalnum():
       
   712         score += 1
       
   713         # Two points for whitespace.
       
   714         if one[-1].isspace() or two[0].isspace():
       
   715           score += 1
       
   716           # Three points for line breaks.
       
   717           if (one[-1] == "\r" or one[-1] == "\n" or
       
   718               two[0] == "\r" or two[0] == "\n"):
       
   719             score += 1
       
   720             # Four points for blank lines.
       
   721             if (re.search("\\n\\r?\\n$", one) or
       
   722                 re.match("^\\r?\\n\\r?\\n", two)):
       
   723               score += 1
       
   724       return score
       
   725 
       
   726     pointer = 1
       
   727     # Intentionally ignore the first and last element (don't need checking).
       
   728     while pointer < len(diffs) - 1:
       
   729       if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
       
   730           diffs[pointer + 1][0] == self.DIFF_EQUAL):
       
   731         # This is a single edit surrounded by equalities.
       
   732         equality1 = diffs[pointer - 1][1]
       
   733         edit = diffs[pointer][1]
       
   734         equality2 = diffs[pointer + 1][1]
       
   735 
       
   736         # First, shift the edit as far left as possible.
       
   737         commonOffset = self.diff_commonSuffix(equality1, edit)
       
   738         if commonOffset:
       
   739           commonString = edit[-commonOffset:]
       
   740           equality1 = equality1[:-commonOffset]
       
   741           edit = commonString + edit[:-commonOffset]
       
   742           equality2 = commonString + equality2
       
   743 
       
   744         # Second, step character by character right, looking for the best fit.
       
   745         bestEquality1 = equality1
       
   746         bestEdit = edit
       
   747         bestEquality2 = equality2
       
   748         bestScore = (diff_cleanupSemanticScore(equality1, edit) +
       
   749             diff_cleanupSemanticScore(edit, equality2))
       
   750         while edit and equality2 and edit[0] == equality2[0]:
       
   751           equality1 += edit[0]
       
   752           edit = edit[1:] + equality2[0]
       
   753           equality2 = equality2[1:]
       
   754           score = (diff_cleanupSemanticScore(equality1, edit) +
       
   755               diff_cleanupSemanticScore(edit, equality2))
       
   756           # The >= encourages trailing rather than leading whitespace on edits.
       
   757           if score >= bestScore:
       
   758             bestScore = score
       
   759             bestEquality1 = equality1
       
   760             bestEdit = edit
       
   761             bestEquality2 = equality2
       
   762 
       
   763         if diffs[pointer - 1][1] != bestEquality1:
       
   764           # We have an improvement, save it back to the diff.
       
   765           if bestEquality1:
       
   766             diffs[pointer - 1] = (diffs[pointer - 1][0], bestEquality1)
       
   767           else:
       
   768             del diffs[pointer - 1]
       
   769             pointer -= 1
       
   770           diffs[pointer] = (diffs[pointer][0], bestEdit)
       
   771           if bestEquality2:
       
   772             diffs[pointer + 1] = (diffs[pointer + 1][0], bestEquality2)
       
   773           else:
       
   774             del diffs[pointer + 1]
       
   775             pointer -= 1
       
   776       pointer += 1
       
   777 
       
   778   def diff_cleanupEfficiency(self, diffs):
       
   779     """Reduce the number of edits by eliminating operationally trivial
       
   780     equalities.
       
   781 
       
   782     Args:
       
   783       diffs: Array of diff tuples.
       
   784     """
       
   785     changes = False
       
   786     equalities = []  # Stack of indices where equalities are found.
       
   787     lastequality = ''  # Always equal to equalities[-1][1]
       
   788     pointer = 0  # Index of current position.
       
   789     pre_ins = False  # Is there an insertion operation before the last equality.
       
   790     pre_del = False  # Is there a deletion operation before the last equality.
       
   791     post_ins = False  # Is there an insertion operation after the last equality.
       
   792     post_del = False  # Is there a deletion operation after the last equality.
       
   793     while pointer < len(diffs):
       
   794       if diffs[pointer][0] == self.DIFF_EQUAL:  # equality found
       
   795         if (len(diffs[pointer][1]) < self.Diff_EditCost and
       
   796             (post_ins or post_del)):
       
   797           # Candidate found.
       
   798           equalities.append(pointer)
       
   799           pre_ins = post_ins
       
   800           pre_del = post_del
       
   801           lastequality = diffs[pointer][1]
       
   802         else:
       
   803           # Not a candidate, and can never become one.
       
   804           equalities = []
       
   805           lastequality = ''
       
   806 
       
   807         post_ins = post_del = False
       
   808       else:  # an insertion or deletion
       
   809         if diffs[pointer][0] == self.DIFF_DELETE:
       
   810           post_del = True
       
   811         else:
       
   812           post_ins = True
       
   813 
       
   814         # Five types to be split:
       
   815         # <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
       
   816         # <ins>A</ins>X<ins>C</ins><del>D</del>
       
   817         # <ins>A</ins><del>B</del>X<ins>C</ins>
       
   818         # <ins>A</del>X<ins>C</ins><del>D</del>
       
   819         # <ins>A</ins><del>B</del>X<del>C</del>
       
   820 
       
   821         if lastequality and ((pre_ins and pre_del and post_ins and post_del) or
       
   822                              ((len(lastequality) < self.Diff_EditCost / 2) and
       
   823                               (pre_ins + pre_del + post_ins + post_del) == 3)):
       
   824           # Duplicate record
       
   825           diffs.insert(equalities[-1], (self.DIFF_DELETE, lastequality))
       
   826           # Change second copy to insert.
       
   827           diffs[equalities[-1] + 1] = (self.DIFF_INSERT,
       
   828               diffs[equalities[-1] + 1][1])
       
   829           equalities.pop()  # Throw away the equality we just deleted
       
   830           lastequality = ''
       
   831           if pre_ins and pre_del:
       
   832             # No changes made which could affect previous entry, keep going.
       
   833             post_ins = post_del = True
       
   834             equalities = []
       
   835           else:
       
   836             if len(equalities):
       
   837               equalities.pop()  # Throw away the previous equality
       
   838             if len(equalities):
       
   839               pointer = equalities[-1]
       
   840             else:
       
   841               pointer = -1
       
   842             post_ins = post_del = False
       
   843           changes = True
       
   844       pointer += 1
       
   845 
       
   846     if changes:
       
   847       self.diff_cleanupMerge(diffs)
       
   848 
       
   849   def diff_cleanupMerge(self, diffs):
       
   850     """Reorder and merge like edit sections.  Merge equalities.
       
   851     Any edit section can move as long as it doesn't cross an equality.
       
   852 
       
   853     Args:
       
   854       diffs: Array of diff tuples.
       
   855     """
       
   856     diffs.append((self.DIFF_EQUAL, ''))  # Add a dummy entry at the end.
       
   857     pointer = 0
       
   858     count_delete = 0
       
   859     count_insert = 0
       
   860     text_delete = ''
       
   861     text_insert = ''
       
   862     while pointer < len(diffs):
       
   863       if diffs[pointer][0] == self.DIFF_INSERT:
       
   864         count_insert += 1
       
   865         text_insert += diffs[pointer][1]
       
   866         pointer += 1
       
   867       elif diffs[pointer][0] == self.DIFF_DELETE:
       
   868         count_delete += 1
       
   869         text_delete += diffs[pointer][1]
       
   870         pointer += 1
       
   871       elif diffs[pointer][0] == self.DIFF_EQUAL:
       
   872         # Upon reaching an equality, check for prior redundancies.
       
   873         if count_delete != 0 or count_insert != 0:
       
   874           if count_delete != 0 and count_insert != 0:
       
   875             # Factor out any common prefixies.
       
   876             commonlength = self.diff_commonPrefix(text_insert, text_delete)
       
   877             if commonlength != 0:
       
   878               x = pointer - count_delete - count_insert - 1
       
   879               if x >= 0 and diffs[x][0] == self.DIFF_EQUAL:
       
   880                 diffs[x] = (diffs[x][0], diffs[x][1] +
       
   881                             text_insert[:commonlength])
       
   882               else:
       
   883                 diffs.insert(0, (self.DIFF_EQUAL, text_insert[:commonlength]))
       
   884                 pointer += 1
       
   885               text_insert = text_insert[commonlength:]
       
   886               text_delete = text_delete[commonlength:]
       
   887             # Factor out any common suffixies.
       
   888             commonlength = self.diff_commonSuffix(text_insert, text_delete)
       
   889             if commonlength != 0:
       
   890               diffs[pointer] = (diffs[pointer][0], text_insert[-commonlength:] +
       
   891                   diffs[pointer][1])
       
   892               text_insert = text_insert[:-commonlength]
       
   893               text_delete = text_delete[:-commonlength]
       
   894           # Delete the offending records and add the merged ones.
       
   895           if count_delete == 0:
       
   896             diffs[pointer - count_insert : pointer] = [
       
   897                 (self.DIFF_INSERT, text_insert)]
       
   898           elif count_insert == 0:
       
   899             diffs[pointer - count_delete : pointer] = [
       
   900                 (self.DIFF_DELETE, text_delete)]
       
   901           else:
       
   902             diffs[pointer - count_delete - count_insert : pointer] = [
       
   903                 (self.DIFF_DELETE, text_delete),
       
   904                 (self.DIFF_INSERT, text_insert)]
       
   905           pointer = pointer - count_delete - count_insert + 1
       
   906           if count_delete != 0:
       
   907             pointer += 1
       
   908           if count_insert != 0:
       
   909             pointer += 1
       
   910         elif pointer != 0 and diffs[pointer - 1][0] == self.DIFF_EQUAL:
       
   911           # Merge this equality with the previous one.
       
   912           diffs[pointer - 1] = (diffs[pointer - 1][0],
       
   913                                 diffs[pointer - 1][1] + diffs[pointer][1])
       
   914           del diffs[pointer]
       
   915         else:
       
   916           pointer += 1
       
   917 
       
   918         count_insert = 0
       
   919         count_delete = 0
       
   920         text_delete = ''
       
   921         text_insert = ''
       
   922 
       
   923     if diffs[-1][1] == '':
       
   924       diffs.pop()  # Remove the dummy entry at the end.
       
   925 
       
   926     # Second pass: look for single edits surrounded on both sides by equalities
       
   927     # which can be shifted sideways to eliminate an equality.
       
   928     # e.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
       
   929     changes = False
       
   930     pointer = 1
       
   931     # Intentionally ignore the first and last element (don't need checking).
       
   932     while pointer < len(diffs) - 1:
       
   933       if (diffs[pointer - 1][0] == self.DIFF_EQUAL and
       
   934           diffs[pointer + 1][0] == self.DIFF_EQUAL):
       
   935         # This is a single edit surrounded by equalities.
       
   936         if diffs[pointer][1].endswith(diffs[pointer - 1][1]):
       
   937           # Shift the edit over the previous equality.
       
   938           diffs[pointer] = (diffs[pointer][0],
       
   939               diffs[pointer - 1][1] +
       
   940               diffs[pointer][1][:-len(diffs[pointer - 1][1])])
       
   941           diffs[pointer + 1] = (diffs[pointer + 1][0],
       
   942                                 diffs[pointer - 1][1] + diffs[pointer + 1][1])
       
   943           del diffs[pointer - 1]
       
   944           changes = True
       
   945         elif diffs[pointer][1].startswith(diffs[pointer + 1][1]):
       
   946           # Shift the edit over the next equality.
       
   947           diffs[pointer - 1] = (diffs[pointer - 1][0],
       
   948                                 diffs[pointer - 1][1] + diffs[pointer + 1][1])
       
   949           diffs[pointer] = (diffs[pointer][0],
       
   950               diffs[pointer][1][len(diffs[pointer + 1][1]):] +
       
   951               diffs[pointer + 1][1])
       
   952           del diffs[pointer + 1]
       
   953           changes = True
       
   954       pointer += 1
       
   955 
       
   956     # If shifts were made, the diff needs reordering and another shift sweep.
       
   957     if changes:
       
   958       self.diff_cleanupMerge(diffs)
       
   959 
       
   960   def diff_xIndex(self, diffs, loc):
       
   961     """loc is a location in text1, compute and return the equivalent location
       
   962     in text2.  e.g. "The cat" vs "The big cat", 1->1, 5->8
       
   963 
       
   964     Args:
       
   965       diffs: Array of diff tuples.
       
   966       loc: Location within text1.
       
   967 
       
   968     Returns:
       
   969       Location within text2.
       
   970     """
       
   971     chars1 = 0
       
   972     chars2 = 0
       
   973     last_chars1 = 0
       
   974     last_chars2 = 0
       
   975     for x in xrange(len(diffs)):
       
   976       (op, text) = diffs[x]
       
   977       if op != self.DIFF_INSERT:  # Equality or deletion.
       
   978         chars1 += len(text)
       
   979       if op != self.DIFF_DELETE:  # Equality or insertion.
       
   980         chars2 += len(text)
       
   981       if chars1 > loc:  # Overshot the location.
       
   982         break
       
   983       last_chars1 = chars1
       
   984       last_chars2 = chars2
       
   985 
       
   986     if len(diffs) != x and diffs[x][0] == self.DIFF_DELETE:
       
   987       # The location was deleted.
       
   988       return last_chars2
       
   989     # Add the remaining len(character).
       
   990     return last_chars2 + (loc - last_chars1)
       
   991 
       
   992   def diff_prettyHtml(self, diffs):
       
   993     """Convert a diff array into a pretty HTML report.
       
   994 
       
   995     Args:
       
   996       diffs: Array of diff tuples.
       
   997 
       
   998     Returns:
       
   999       HTML representation.
       
  1000     """
       
  1001     html = []
       
  1002     i = 0
       
  1003     for (op, data) in diffs:
       
  1004       text = (data.replace("&", "&amp;").replace("<", "&lt;")
       
  1005                  .replace(">", "&gt;").replace("\n", "&para;<BR>"))
       
  1006       if op == self.DIFF_INSERT:
       
  1007         html.append("<INS STYLE=\"background:#E6FFE6;\" TITLE=\"i=%i\">%s</INS>"
       
  1008             % (i, text))
       
  1009       elif op == self.DIFF_DELETE:
       
  1010         html.append("<DEL STYLE=\"background:#FFE6E6;\" TITLE=\"i=%i\">%s</DEL>"
       
  1011             % (i, text))
       
  1012       elif op == self.DIFF_EQUAL:
       
  1013         html.append("<SPAN TITLE=\"i=%i\">%s</SPAN>" % (i, text))
       
  1014       if op != self.DIFF_DELETE:
       
  1015         i += len(data)
       
  1016     return "".join(html)
       
  1017 
       
  1018   def diff_text1(self, diffs):
       
  1019     """Compute and return the source text (all equalities and deletions).
       
  1020 
       
  1021     Args:
       
  1022       diffs: Array of diff tuples.
       
  1023 
       
  1024     Returns:
       
  1025       Source text.
       
  1026     """
       
  1027     text = []
       
  1028     for (op, data) in diffs:
       
  1029       if op != self.DIFF_INSERT:
       
  1030         text.append(data)
       
  1031     return "".join(text)
       
  1032 
       
  1033   def diff_text2(self, diffs):
       
  1034     """Compute and return the destination text (all equalities and insertions).
       
  1035 
       
  1036     Args:
       
  1037       diffs: Array of diff tuples.
       
  1038 
       
  1039     Returns:
       
  1040       Destination text.
       
  1041     """
       
  1042     text = []
       
  1043     for (op, data) in diffs:
       
  1044       if op != self.DIFF_DELETE:
       
  1045         text.append(data)
       
  1046     return "".join(text)
       
  1047 
       
  1048   def diff_levenshtein(self, diffs):
       
  1049     """Compute the Levenshtein distance; the number of inserted, deleted or
       
  1050     substituted characters.
       
  1051 
       
  1052     Args:
       
  1053       diffs: Array of diff tuples.
       
  1054 
       
  1055     Returns:
       
  1056       Number of changes.
       
  1057     """
       
  1058     levenshtein = 0
       
  1059     insertions = 0
       
  1060     deletions = 0
       
  1061     for (op, data) in diffs:
       
  1062       if op == self.DIFF_INSERT:
       
  1063         insertions += len(data)
       
  1064       elif op == self.DIFF_DELETE:
       
  1065         deletions += len(data)
       
  1066       elif op == self.DIFF_EQUAL:
       
  1067         # A deletion and an insertion is one substitution.
       
  1068         levenshtein += max(insertions, deletions)
       
  1069         insertions = 0
       
  1070         deletions = 0
       
  1071     levenshtein += max(insertions, deletions)
       
  1072     return levenshtein
       
  1073 
       
  1074   def diff_toDelta(self, diffs):
       
  1075     """Crush the diff into an encoded string which describes the operations
       
  1076     required to transform text1 into text2.
       
  1077     E.g. =3\t-2\t+ing  -> Keep 3 chars, delete 2 chars, insert 'ing'.
       
  1078     Operations are tab-separated.  Inserted text is escaped using %xx notation.
       
  1079 
       
  1080     Args:
       
  1081       diffs: Array of diff tuples.
       
  1082 
       
  1083     Returns:
       
  1084       Delta text.
       
  1085     """
       
  1086     text = []
       
  1087     for (op, data) in diffs:
       
  1088       if op == self.DIFF_INSERT:
       
  1089         # High ascii will raise UnicodeDecodeError.  Use Unicode instead.
       
  1090         data = data.encode("utf-8")
       
  1091         text.append("+" + urllib.quote(data, "!~*'();/?:@&=+$,# "))
       
  1092       elif op == self.DIFF_DELETE:
       
  1093         text.append("-%d" % len(data))
       
  1094       elif op == self.DIFF_EQUAL:
       
  1095         text.append("=%d" % len(data))
       
  1096     return "\t".join(text)
       
  1097 
       
  1098   def diff_fromDelta(self, text1, delta):
       
  1099     """Given the original text1, and an encoded string which describes the
       
  1100     operations required to transform text1 into text2, compute the full diff.
       
  1101 
       
  1102     Args:
       
  1103       text1: Source string for the diff.
       
  1104       delta: Delta text.
       
  1105 
       
  1106     Returns:
       
  1107       Array of diff tuples.
       
  1108 
       
  1109     Raises:
       
  1110       ValueError: If invalid input.
       
  1111     """
       
  1112     if type(delta) == unicode:
       
  1113       # Deltas should be composed of a subset of ascii chars, Unicode not
       
  1114       # required.  If this encode raises UnicodeEncodeError, delta is invalid.
       
  1115       delta = delta.encode("ascii")
       
  1116     diffs = []
       
  1117     pointer = 0  # Cursor in text1
       
  1118     tokens = delta.split("\t")
       
  1119     for token in tokens:
       
  1120       if token == "":
       
  1121         # Blank tokens are ok (from a trailing \t).
       
  1122         continue
       
  1123       # Each token begins with a one character parameter which specifies the
       
  1124       # operation of this token (delete, insert, equality).
       
  1125       param = token[1:]
       
  1126       if token[0] == "+":
       
  1127         param = urllib.unquote(param).decode("utf-8")
       
  1128         diffs.append((self.DIFF_INSERT, param))
       
  1129       elif token[0] == "-" or token[0] == "=":
       
  1130         try:
       
  1131           n = int(param)
       
  1132         except ValueError:
       
  1133           raise ValueError, "Invalid number in diff_fromDelta: " + param
       
  1134         if n < 0:
       
  1135           raise ValueError, "Negative number in diff_fromDelta: " + param
       
  1136         text = text1[pointer : pointer + n]
       
  1137         pointer += n
       
  1138         if token[0] == "=":
       
  1139           diffs.append((self.DIFF_EQUAL, text))
       
  1140         else:
       
  1141           diffs.append((self.DIFF_DELETE, text))
       
  1142       else:
       
  1143         # Anything else is an error.
       
  1144         raise ValueError, ("Invalid diff operation in diff_fromDelta: " +
       
  1145             token[0])
       
  1146     if pointer != len(text1):
       
  1147       raise ValueError, (
       
  1148           "Delta length (%d) does not equal source text length (%d)." %
       
  1149          (pointer, len(text1)))
       
  1150     return diffs
       
  1151 
       
  1152   #  MATCH FUNCTIONS
       
  1153 
       
  1154   def match_main(self, text, pattern, loc):
       
  1155     """Locate the best instance of 'pattern' in 'text' near 'loc'.
       
  1156 
       
  1157     Args:
       
  1158       text: The text to search.
       
  1159       pattern: The pattern to search for.
       
  1160       loc: The location to search around.
       
  1161 
       
  1162     Returns:
       
  1163       Best match index or -1.
       
  1164     """
       
  1165     loc = max(0, min(loc, len(text)))
       
  1166     if text == pattern:
       
  1167       # Shortcut (potentially not guaranteed by the algorithm)
       
  1168       return 0
       
  1169     elif not text:
       
  1170       # Nothing to match.
       
  1171       return -1
       
  1172     elif text[loc:loc + len(pattern)] == pattern:
       
  1173       # Perfect match at the perfect spot!  (Includes case of null pattern)
       
  1174       return loc
       
  1175     else:
       
  1176       # Do a fuzzy compare.
       
  1177       match = self.match_bitap(text, pattern, loc)
       
  1178       return match
       
  1179 
       
  1180   def match_bitap(self, text, pattern, loc):
       
  1181     """Locate the best instance of 'pattern' in 'text' near 'loc' using the
       
  1182     Bitap algorithm.
       
  1183 
       
  1184     Args:
       
  1185       text: The text to search.
       
  1186       pattern: The pattern to search for.
       
  1187       loc: The location to search around.
       
  1188 
       
  1189     Returns:
       
  1190       Best match index or -1.
       
  1191     """
       
  1192     # Python doesn't have a maxint limit, so ignore this check.
       
  1193     #if self.Match_MaxBits != 0 and len(pattern) > self.Match_MaxBits:
       
  1194     #  raise ValueError("Pattern too long for this application.")
       
  1195 
       
  1196     # Initialise the alphabet.
       
  1197     s = self.match_alphabet(pattern)
       
  1198 
       
  1199     def match_bitapScore(e, x):
       
  1200       """Compute and return the score for a match with e errors and x location.
       
  1201       Accesses loc and pattern through being a closure.
       
  1202 
       
  1203       Args:
       
  1204         e: Number of errors in match.
       
  1205         x: Location of match.
       
  1206 
       
  1207       Returns:
       
  1208         Overall score for match (0.0 = good, 1.0 = bad).
       
  1209       """
       
  1210       accuracy = float(e) / len(pattern)
       
  1211       proximity = abs(loc - x)
       
  1212       if not self.Match_Distance:
       
  1213         # Dodge divide by zero error.
       
  1214         return proximity and 1.0 or accuracy
       
  1215       return accuracy + (proximity / float(self.Match_Distance))
       
  1216 
       
  1217     # Highest score beyond which we give up.
       
  1218     score_threshold = self.Match_Threshold
       
  1219     # Is there a nearby exact match? (speedup)
       
  1220     best_loc = text.find(pattern, loc)
       
  1221     if best_loc != -1:
       
  1222       score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
       
  1223       # What about in the other direction? (speedup)
       
  1224       best_loc = text.rfind(pattern, loc + len(pattern))
       
  1225       if best_loc != -1:
       
  1226         score_threshold = min(match_bitapScore(0, best_loc), score_threshold)
       
  1227 
       
  1228     # Initialise the bit arrays.
       
  1229     matchmask = 1 << (len(pattern) - 1)
       
  1230     best_loc = -1
       
  1231 
       
  1232     bin_max = len(pattern) + len(text)
       
  1233     # Empty initialization added to appease pychecker.
       
  1234     last_rd = None
       
  1235     for d in xrange(len(pattern)):
       
  1236       # Scan for the best match each iteration allows for one more error.
       
  1237       # Run a binary search to determine how far from 'loc' we can stray at
       
  1238       # this error level.
       
  1239       bin_min = 0
       
  1240       bin_mid = bin_max
       
  1241       while bin_min < bin_mid:
       
  1242         if match_bitapScore(d, loc + bin_mid) <= score_threshold:
       
  1243           bin_min = bin_mid
       
  1244         else:
       
  1245           bin_max = bin_mid
       
  1246         bin_mid = (bin_max - bin_min) / 2 + bin_min
       
  1247 
       
  1248       # Use the result from this iteration as the maximum for the next.
       
  1249       bin_max = bin_mid
       
  1250       start = max(1, loc - bin_mid + 1)
       
  1251       finish = min(loc + bin_mid, len(text)) + len(pattern)
       
  1252 
       
  1253       rd = range(finish + 1)
       
  1254       rd.append((1 << d) - 1)
       
  1255       for j in xrange(finish, start - 1, -1):
       
  1256         if len(text) <= j - 1:
       
  1257           # Out of range.
       
  1258           charMatch = 0
       
  1259         else:
       
  1260           charMatch = s.get(text[j - 1], 0)
       
  1261         if d == 0:  # First pass: exact match.
       
  1262           rd[j] = ((rd[j + 1] << 1) | 1) & charMatch
       
  1263         else:  # Subsequent passes: fuzzy match.
       
  1264           rd[j] = ((rd[j + 1] << 1) | 1) & charMatch | (
       
  1265               ((last_rd[j + 1] | last_rd[j]) << 1) | 1) | last_rd[j + 1]
       
  1266         if rd[j] & matchmask:
       
  1267           score = match_bitapScore(d, j - 1)
       
  1268           # This match will almost certainly be better than any existing match.
       
  1269           # But check anyway.
       
  1270           if score <= score_threshold:
       
  1271             # Told you so.
       
  1272             score_threshold = score
       
  1273             best_loc = j - 1
       
  1274             if best_loc > loc:
       
  1275               # When passing loc, don't exceed our current distance from loc.
       
  1276               start = max(1, 2 * loc - best_loc)
       
  1277             else:
       
  1278               # Already passed loc, downhill from here on in.
       
  1279               break
       
  1280       # No hope for a (better) match at greater error levels.
       
  1281       if match_bitapScore(d + 1, loc) > score_threshold:
       
  1282         break
       
  1283       last_rd = rd
       
  1284     return best_loc
       
  1285 
       
  1286   def match_alphabet(self, pattern):
       
  1287     """Initialise the alphabet for the Bitap algorithm.
       
  1288 
       
  1289     Args:
       
  1290       pattern: The text to encode.
       
  1291 
       
  1292     Returns:
       
  1293       Hash of character locations.
       
  1294     """
       
  1295     s = {}
       
  1296     for char in pattern:
       
  1297       s[char] = 0
       
  1298     for i in xrange(len(pattern)):
       
  1299       s[pattern[i]] |= 1 << (len(pattern) - i - 1)
       
  1300     return s
       
  1301 
       
  1302   #  PATCH FUNCTIONS
       
  1303 
       
  1304   def patch_addContext(self, patch, text):
       
  1305     """Increase the context until it is unique,
       
  1306     but don't let the pattern expand beyond Match_MaxBits.
       
  1307 
       
  1308     Args:
       
  1309       patch: The patch to grow.
       
  1310       text: Source text.
       
  1311     """
       
  1312     if len(text) == 0:
       
  1313       return
       
  1314     pattern = text[patch.start2 : patch.start2 + patch.length1]
       
  1315     padding = 0
       
  1316 
       
  1317     # Look for the first and last matches of pattern in text.  If two different
       
  1318     # matches are found, increase the pattern length.
       
  1319     while (text.find(pattern) != text.rfind(pattern) and (self.Match_MaxBits ==
       
  1320         0 or len(pattern) < self.Match_MaxBits - self.Patch_Margin -
       
  1321         self.Patch_Margin)):
       
  1322       padding += self.Patch_Margin
       
  1323       pattern = text[max(0, patch.start2 - padding) :
       
  1324                      patch.start2 + patch.length1 + padding]
       
  1325     # Add one chunk for good luck.
       
  1326     padding += self.Patch_Margin
       
  1327 
       
  1328     # Add the prefix.
       
  1329     prefix = text[max(0, patch.start2 - padding) : patch.start2]
       
  1330     if prefix:
       
  1331       patch.diffs[:0] = [(self.DIFF_EQUAL, prefix)]
       
  1332     # Add the suffix.
       
  1333     suffix = text[patch.start2 + patch.length1 :
       
  1334                   patch.start2 + patch.length1 + padding]
       
  1335     if suffix:
       
  1336       patch.diffs.append((self.DIFF_EQUAL, suffix))
       
  1337 
       
  1338     # Roll back the start points.
       
  1339     patch.start1 -= len(prefix)
       
  1340     patch.start2 -= len(prefix)
       
  1341     # Extend lengths.
       
  1342     patch.length1 += len(prefix) + len(suffix)
       
  1343     patch.length2 += len(prefix) + len(suffix)
       
  1344 
       
  1345   def patch_make(self, a, b=None, c=None):
       
  1346     """Compute a list of patches to turn text1 into text2.
       
  1347     Use diffs if provided, otherwise compute it ourselves.
       
  1348     There are four ways to call this function, depending on what data is
       
  1349     available to the caller:
       
  1350     Method 1:
       
  1351     a = text1, b = text2
       
  1352     Method 2:
       
  1353     a = diffs
       
  1354     Method 3 (optimal):
       
  1355     a = text1, b = diffs
       
  1356     Method 4 (deprecated, use method 3):
       
  1357     a = text1, b = text2, c = diffs
       
  1358 
       
  1359     Args:
       
  1360       a: text1 (methods 1,3,4) or Array of diff tuples for text1 to
       
  1361           text2 (method 2).
       
  1362       b: text2 (methods 1,4) or Array of diff tuples for text1 to
       
  1363           text2 (method 3) or undefined (method 2).
       
  1364       c: Array of diff tuples for text1 to text2 (method 4) or
       
  1365           undefined (methods 1,2,3).
       
  1366 
       
  1367     Returns:
       
  1368       Array of patch objects.
       
  1369     """
       
  1370     text1 = None
       
  1371     diffs = None
       
  1372     # Note that texts may arrive as 'str' or 'unicode'.
       
  1373     if isinstance(a, basestring) and isinstance(b, basestring) and c is None:
       
  1374       # Method 1: text1, text2
       
  1375       # Compute diffs from text1 and text2.
       
  1376       text1 = a
       
  1377       diffs = self.diff_main(text1, b, True)
       
  1378       if len(diffs) > 2:
       
  1379         self.diff_cleanupSemantic(diffs)
       
  1380         self.diff_cleanupEfficiency(diffs)
       
  1381     elif isinstance(a, list) and b is None and c is None:
       
  1382       # Method 2: diffs
       
  1383       # Compute text1 from diffs.
       
  1384       diffs = a
       
  1385       text1 = self.diff_text1(diffs)
       
  1386     elif isinstance(a, basestring) and isinstance(b, list) and c is None:
       
  1387       # Method 3: text1, diffs
       
  1388       text1 = a
       
  1389       diffs = b
       
  1390     elif (isinstance(a, basestring) and isinstance(b, basestring) and
       
  1391           isinstance(c, list)):
       
  1392       # Method 4: text1, text2, diffs
       
  1393       # text2 is not used.
       
  1394       text1 = a
       
  1395       diffs = c
       
  1396     else:
       
  1397       raise ValueError("Unknown call format to patch_make.")
       
  1398 
       
  1399     if not diffs:
       
  1400       return []  # Get rid of the None case.
       
  1401     patches = []
       
  1402     patch = patch_obj()
       
  1403     char_count1 = 0  # Number of characters into the text1 string.
       
  1404     char_count2 = 0  # Number of characters into the text2 string.
       
  1405     prepatch_text = text1  # Recreate the patches to determine context info.
       
  1406     postpatch_text = text1
       
  1407     for x in xrange(len(diffs)):
       
  1408       (diff_type, diff_text) = diffs[x]
       
  1409       if len(patch.diffs) == 0 and diff_type != self.DIFF_EQUAL:
       
  1410         # A new patch starts here.
       
  1411         patch.start1 = char_count1
       
  1412         patch.start2 = char_count2
       
  1413       if diff_type == self.DIFF_INSERT:
       
  1414         # Insertion
       
  1415         patch.diffs.append(diffs[x])
       
  1416         patch.length2 += len(diff_text)
       
  1417         postpatch_text = (postpatch_text[:char_count2] + diff_text +
       
  1418                           postpatch_text[char_count2:])
       
  1419       elif diff_type == self.DIFF_DELETE:
       
  1420         # Deletion.
       
  1421         patch.length1 += len(diff_text)
       
  1422         patch.diffs.append(diffs[x])
       
  1423         postpatch_text = (postpatch_text[:char_count2] +
       
  1424                           postpatch_text[char_count2 + len(diff_text):])
       
  1425       elif (diff_type == self.DIFF_EQUAL and
       
  1426             len(diff_text) <= 2 * self.Patch_Margin and
       
  1427             len(patch.diffs) != 0 and len(diffs) != x + 1):
       
  1428         # Small equality inside a patch.
       
  1429         patch.diffs.append(diffs[x])
       
  1430         patch.length1 += len(diff_text)
       
  1431         patch.length2 += len(diff_text)
       
  1432 
       
  1433       if (diff_type == self.DIFF_EQUAL and
       
  1434           len(diff_text) >= 2 * self.Patch_Margin):
       
  1435         # Time for a new patch.
       
  1436         if len(patch.diffs) != 0:
       
  1437           self.patch_addContext(patch, prepatch_text)
       
  1438           patches.append(patch)
       
  1439           patch = patch_obj()
       
  1440           # Unlike Unidiff, our patch lists have a rolling context.
       
  1441           # http://code.google.com/p/google-diff-match-patch/wiki/Unidiff
       
  1442           # Update prepatch text & pos to reflect the application of the
       
  1443           # just completed patch.
       
  1444           prepatch_text = postpatch_text
       
  1445           char_count1 = char_count2
       
  1446 
       
  1447       # Update the current character count.
       
  1448       if diff_type != self.DIFF_INSERT:
       
  1449         char_count1 += len(diff_text)
       
  1450       if diff_type != self.DIFF_DELETE:
       
  1451         char_count2 += len(diff_text)
       
  1452 
       
  1453     # Pick up the leftover patch if not empty.
       
  1454     if len(patch.diffs) != 0:
       
  1455       self.patch_addContext(patch, prepatch_text)
       
  1456       patches.append(patch)
       
  1457     return patches
       
  1458 
       
  1459   def patch_deepCopy(self, patches):
       
  1460     """Given an array of patches, return another array that is identical.
       
  1461 
       
  1462     Args:
       
  1463       patches: Array of patch objects.
       
  1464 
       
  1465     Returns:
       
  1466       Array of patch objects.
       
  1467     """
       
  1468     patchesCopy = []
       
  1469     for patch in patches:
       
  1470       patchCopy = patch_obj()
       
  1471       # No need to deep copy the tuples since they are immutable.
       
  1472       patchCopy.diffs = patch.diffs[:]
       
  1473       patchCopy.start1 = patch.start1
       
  1474       patchCopy.start2 = patch.start2
       
  1475       patchCopy.length1 = patch.length1
       
  1476       patchCopy.length2 = patch.length2
       
  1477       patchesCopy.append(patchCopy)
       
  1478     return patchesCopy
       
  1479 
       
  1480   def patch_apply(self, patches, text):
       
  1481     """Merge a set of patches onto the text.  Return a patched text, as well
       
  1482     as a list of true/false values indicating which patches were applied.
       
  1483 
       
  1484     Args:
       
  1485       patches: Array of patch objects.
       
  1486       text: Old text.
       
  1487 
       
  1488     Returns:
       
  1489       Two element Array, containing the new text and an array of boolean values.
       
  1490     """
       
  1491     if not patches:
       
  1492       return (text, [])
       
  1493 
       
  1494     # Deep copy the patches so that no changes are made to originals.
       
  1495     patches = self.patch_deepCopy(patches)
       
  1496 
       
  1497     nullPadding = self.patch_addPadding(patches)
       
  1498     text = nullPadding + text + nullPadding
       
  1499     self.patch_splitMax(patches)
       
  1500 
       
  1501     # delta keeps track of the offset between the expected and actual location
       
  1502     # of the previous patch.  If there are patches expected at positions 10 and
       
  1503     # 20, but the first patch was found at 12, delta is 2 and the second patch
       
  1504     # has an effective expected position of 22.
       
  1505     delta = 0
       
  1506     results = []
       
  1507     for patch in patches:
       
  1508       expected_loc = patch.start2 + delta
       
  1509       text1 = self.diff_text1(patch.diffs)
       
  1510       end_loc = -1
       
  1511       if len(text1) > self.Match_MaxBits:
       
  1512         # patch_splitMax will only provide an oversized pattern in the case of
       
  1513         # a monster delete.
       
  1514         start_loc = self.match_main(text, text1[:self.Match_MaxBits],
       
  1515                                     expected_loc)
       
  1516         if start_loc != -1:
       
  1517           end_loc = self.match_main(text, text1[-self.Match_MaxBits:],
       
  1518               expected_loc + len(text1) - self.Match_MaxBits)
       
  1519           if end_loc == -1 or start_loc >= end_loc:
       
  1520             # Can't find valid trailing context.  Drop this patch.
       
  1521             start_loc = -1
       
  1522       else:
       
  1523         start_loc = self.match_main(text, text1, expected_loc)
       
  1524       if start_loc == -1:
       
  1525         # No match found.  :(
       
  1526         results.append(False)
       
  1527         # Subtract the delta for this failed patch from subsequent patches.
       
  1528         delta -= patch.length2 - patch.length1
       
  1529       else:
       
  1530         # Found a match.  :)
       
  1531         results.append(True)
       
  1532         delta = start_loc - expected_loc
       
  1533         if end_loc == -1:
       
  1534           text2 = text[start_loc : start_loc + len(text1)]
       
  1535         else:
       
  1536           text2 = text[start_loc : end_loc + self.Match_MaxBits]
       
  1537         if text1 == text2:
       
  1538           # Perfect match, just shove the replacement text in.
       
  1539           text = (text[:start_loc] + self.diff_text2(patch.diffs) +
       
  1540                       text[start_loc + len(text1):])
       
  1541         else:
       
  1542           # Imperfect match.
       
  1543           # Run a diff to get a framework of equivalent indices.
       
  1544           diffs = self.diff_main(text1, text2, False)
       
  1545           if (len(text1) > self.Match_MaxBits and
       
  1546               self.diff_levenshtein(diffs) / float(len(text1)) >
       
  1547               self.Patch_DeleteThreshold):
       
  1548             # The end points match, but the content is unacceptably bad.
       
  1549             results[-1] = False
       
  1550           else:
       
  1551             self.diff_cleanupSemanticLossless(diffs)
       
  1552             index1 = 0
       
  1553             for (op, data) in patch.diffs:
       
  1554               if op != self.DIFF_EQUAL:
       
  1555                 index2 = self.diff_xIndex(diffs, index1)
       
  1556               if op == self.DIFF_INSERT:  # Insertion
       
  1557                 text = text[:start_loc + index2] + data + text[start_loc +
       
  1558                                                                index2:]
       
  1559               elif op == self.DIFF_DELETE:  # Deletion
       
  1560                 text = text[:start_loc + index2] + text[start_loc +
       
  1561                     self.diff_xIndex(diffs, index1 + len(data)):]
       
  1562               if op != self.DIFF_DELETE:
       
  1563                 index1 += len(data)
       
  1564     # Strip the padding off.
       
  1565     text = text[len(nullPadding):-len(nullPadding)]
       
  1566     return (text, results)
       
  1567 
       
  1568   def patch_addPadding(self, patches):
       
  1569     """Add some padding on text start and end so that edges can match
       
  1570     something.  Intended to be called only from within patch_apply.
       
  1571 
       
  1572     Args:
       
  1573       patches: Array of patch objects.
       
  1574 
       
  1575     Returns:
       
  1576       The padding string added to each side.
       
  1577     """
       
  1578     paddingLength = self.Patch_Margin
       
  1579     nullPadding = ""
       
  1580     for x in xrange(1, paddingLength + 1):
       
  1581       nullPadding += chr(x)
       
  1582 
       
  1583     # Bump all the patches forward.
       
  1584     for patch in patches:
       
  1585       patch.start1 += paddingLength
       
  1586       patch.start2 += paddingLength
       
  1587 
       
  1588     # Add some padding on start of first diff.
       
  1589     patch = patches[0]
       
  1590     diffs = patch.diffs
       
  1591     if not diffs or diffs[0][0] != self.DIFF_EQUAL:
       
  1592       # Add nullPadding equality.
       
  1593       diffs.insert(0, (self.DIFF_EQUAL, nullPadding))
       
  1594       patch.start1 -= paddingLength  # Should be 0.
       
  1595       patch.start2 -= paddingLength  # Should be 0.
       
  1596       patch.length1 += paddingLength
       
  1597       patch.length2 += paddingLength
       
  1598     elif paddingLength > len(diffs[0][1]):
       
  1599       # Grow first equality.
       
  1600       extraLength = paddingLength - len(diffs[0][1])
       
  1601       newText = nullPadding[len(diffs[0][1]):] + diffs[0][1]
       
  1602       diffs[0] = (diffs[0][0], newText)
       
  1603       patch.start1 -= extraLength
       
  1604       patch.start2 -= extraLength
       
  1605       patch.length1 += extraLength
       
  1606       patch.length2 += extraLength
       
  1607 
       
  1608     # Add some padding on end of last diff.
       
  1609     patch = patches[-1]
       
  1610     diffs = patch.diffs
       
  1611     if not diffs or diffs[-1][0] != self.DIFF_EQUAL:
       
  1612       # Add nullPadding equality.
       
  1613       diffs.append((self.DIFF_EQUAL, nullPadding))
       
  1614       patch.length1 += paddingLength
       
  1615       patch.length2 += paddingLength
       
  1616     elif paddingLength > len(diffs[-1][1]):
       
  1617       # Grow last equality.
       
  1618       extraLength = paddingLength - len(diffs[-1][1])
       
  1619       newText = diffs[-1][1] + nullPadding[:extraLength]
       
  1620       diffs[-1] = (diffs[-1][0], newText)
       
  1621       patch.length1 += extraLength
       
  1622       patch.length2 += extraLength
       
  1623 
       
  1624     return nullPadding
       
  1625 
       
  1626   def patch_splitMax(self, patches):
       
  1627     """Look through the patches and break up any which are longer than the
       
  1628     maximum limit of the match algorithm.
       
  1629 
       
  1630     Args:
       
  1631       patches: Array of patch objects.
       
  1632     """
       
  1633     if self.Match_MaxBits == 0:
       
  1634       return
       
  1635     for x in xrange(len(patches)):
       
  1636       if patches[x].length1 > self.Match_MaxBits:
       
  1637         bigpatch = patches[x]
       
  1638         # Remove the big old patch.
       
  1639         del patches[x]
       
  1640         x -= 1
       
  1641         patch_size = self.Match_MaxBits
       
  1642         start1 = bigpatch.start1
       
  1643         start2 = bigpatch.start2
       
  1644         precontext = ''
       
  1645         while len(bigpatch.diffs) != 0:
       
  1646           # Create one of several smaller patches.
       
  1647           patch = patch_obj()
       
  1648           empty = True
       
  1649           patch.start1 = start1 - len(precontext)
       
  1650           patch.start2 = start2 - len(precontext)
       
  1651           if precontext:
       
  1652             patch.length1 = patch.length2 = len(precontext)
       
  1653             patch.diffs.append((self.DIFF_EQUAL, precontext))
       
  1654 
       
  1655           while (len(bigpatch.diffs) != 0 and
       
  1656                  patch.length1 < patch_size - self.Patch_Margin):
       
  1657             (diff_type, diff_text) = bigpatch.diffs[0]
       
  1658             if diff_type == self.DIFF_INSERT:
       
  1659               # Insertions are harmless.
       
  1660               patch.length2 += len(diff_text)
       
  1661               start2 += len(diff_text)
       
  1662               patch.diffs.append(bigpatch.diffs.pop(0))
       
  1663               empty = False
       
  1664             elif (diff_type == self.DIFF_DELETE and len(patch.diffs) == 1 and
       
  1665                 patch.diffs[0][0] == self.DIFF_EQUAL and
       
  1666                 len(diff_text) > 2 * patch_size):
       
  1667               # This is a large deletion.  Let it pass in one chunk.
       
  1668               patch.length1 += len(diff_text)
       
  1669               start1 += len(diff_text)
       
  1670               empty = False
       
  1671               patch.diffs.append((diff_type, diff_text))
       
  1672               del bigpatch.diffs[0]
       
  1673             else:
       
  1674               # Deletion or equality.  Only take as much as we can stomach.
       
  1675               diff_text = diff_text[:patch_size - patch.length1 -
       
  1676                                     self.Patch_Margin]
       
  1677               patch.length1 += len(diff_text)
       
  1678               start1 += len(diff_text)
       
  1679               if diff_type == self.DIFF_EQUAL:
       
  1680                 patch.length2 += len(diff_text)
       
  1681                 start2 += len(diff_text)
       
  1682               else:
       
  1683                 empty = False
       
  1684 
       
  1685               patch.diffs.append((diff_type, diff_text))
       
  1686               if diff_text == bigpatch.diffs[0][1]:
       
  1687                 del bigpatch.diffs[0]
       
  1688               else:
       
  1689                 bigpatch.diffs[0] = (bigpatch.diffs[0][0],
       
  1690                                      bigpatch.diffs[0][1][len(diff_text):])
       
  1691 
       
  1692           # Compute the head context for the next patch.
       
  1693           precontext = self.diff_text2(patch.diffs)
       
  1694           precontext = precontext[-self.Patch_Margin:]
       
  1695           # Append the end context for this patch.
       
  1696           postcontext = self.diff_text1(bigpatch.diffs)[:self.Patch_Margin]
       
  1697           if postcontext:
       
  1698             patch.length1 += len(postcontext)
       
  1699             patch.length2 += len(postcontext)
       
  1700             if len(patch.diffs) != 0 and patch.diffs[-1][0] == self.DIFF_EQUAL:
       
  1701               patch.diffs[-1] = (self.DIFF_EQUAL, patch.diffs[-1][1] +
       
  1702                                  postcontext)
       
  1703             else:
       
  1704               patch.diffs.append((self.DIFF_EQUAL, postcontext))
       
  1705 
       
  1706           if not empty:
       
  1707             x += 1
       
  1708             patches.insert(x, patch)
       
  1709 
       
  1710   def patch_toText(self, patches):
       
  1711     """Take a list of patches and return a textual representation.
       
  1712 
       
  1713     Args:
       
  1714       patches: Array of patch objects.
       
  1715 
       
  1716     Returns:
       
  1717       Text representation of patches.
       
  1718     """
       
  1719     text = []
       
  1720     for patch in patches:
       
  1721       text.append(str(patch))
       
  1722     return "".join(text)
       
  1723 
       
  1724   def patch_fromText(self, textline):
       
  1725     """Parse a textual representation of patches and return a list of patch
       
  1726     objects.
       
  1727 
       
  1728     Args:
       
  1729       textline: Text representation of patches.
       
  1730 
       
  1731     Returns:
       
  1732       Array of patch objects.
       
  1733 
       
  1734     Raises:
       
  1735       ValueError: If invalid input.
       
  1736     """
       
  1737     if type(textline) == unicode:
       
  1738       # Patches should be composed of a subset of ascii chars, Unicode not
       
  1739       # required.  If this encode raises UnicodeEncodeError, patch is invalid.
       
  1740       textline = textline.encode("ascii")
       
  1741     patches = []
       
  1742     if not textline:
       
  1743       return patches
       
  1744     text = textline.split('\n')
       
  1745     while len(text) != 0:
       
  1746       m = re.match("^@@ -(\d+),?(\d*) \+(\d+),?(\d*) @@$", text[0])
       
  1747       if not m:
       
  1748         raise ValueError, "Invalid patch string: " + text[0]
       
  1749       patch = patch_obj()
       
  1750       patches.append(patch)
       
  1751       patch.start1 = int(m.group(1))
       
  1752       if m.group(2) == '':
       
  1753         patch.start1 -= 1
       
  1754         patch.length1 = 1
       
  1755       elif m.group(2) == '0':
       
  1756         patch.length1 = 0
       
  1757       else:
       
  1758         patch.start1 -= 1
       
  1759         patch.length1 = int(m.group(2))
       
  1760 
       
  1761       patch.start2 = int(m.group(3))
       
  1762       if m.group(4) == '':
       
  1763         patch.start2 -= 1
       
  1764         patch.length2 = 1
       
  1765       elif m.group(4) == '0':
       
  1766         patch.length2 = 0
       
  1767       else:
       
  1768         patch.start2 -= 1
       
  1769         patch.length2 = int(m.group(4))
       
  1770 
       
  1771       del text[0]
       
  1772 
       
  1773       while len(text) != 0:
       
  1774         if text[0]:
       
  1775           sign = text[0][0]
       
  1776         else:
       
  1777           sign = ''
       
  1778         line = urllib.unquote(text[0][1:])
       
  1779         line = line.decode("utf-8")
       
  1780         if sign == '+':
       
  1781           # Insertion.
       
  1782           patch.diffs.append((self.DIFF_INSERT, line))
       
  1783         elif sign == '-':
       
  1784           # Deletion.
       
  1785           patch.diffs.append((self.DIFF_DELETE, line))
       
  1786         elif sign == ' ':
       
  1787           # Minor equality.
       
  1788           patch.diffs.append((self.DIFF_EQUAL, line))
       
  1789         elif sign == '@':
       
  1790           # Start of next patch.
       
  1791           break
       
  1792         elif sign == '':
       
  1793           # Blank line?  Whatever.
       
  1794           pass
       
  1795         else:
       
  1796           # WTF?
       
  1797           raise ValueError, "Invalid patch mode: '%s'\n%s" % (sign, line)
       
  1798         del text[0]
       
  1799     return patches
       
  1800 
       
  1801 
       
  1802 class patch_obj:
       
  1803   """Class representing one patch operation.
       
  1804   """
       
  1805 
       
  1806   def __init__(self):
       
  1807     """Initializes with an empty list of diffs.
       
  1808     """
       
  1809     self.diffs = []
       
  1810     self.start1 = None
       
  1811     self.start2 = None
       
  1812     self.length1 = 0
       
  1813     self.length2 = 0
       
  1814 
       
  1815   def __str__(self):
       
  1816     """Emmulate GNU diff's format.
       
  1817     Header: @@ -382,8 +481,9 @@
       
  1818     Indicies are printed as 1-based, not 0-based.
       
  1819 
       
  1820     Returns:
       
  1821       The GNU diff string.
       
  1822     """
       
  1823     if self.length1 == 0:
       
  1824       coords1 = str(self.start1) + ",0"
       
  1825     elif self.length1 == 1:
       
  1826       coords1 = str(self.start1 + 1)
       
  1827     else:
       
  1828       coords1 = str(self.start1 + 1) + "," + str(self.length1)
       
  1829     if self.length2 == 0:
       
  1830       coords2 = str(self.start2) + ",0"
       
  1831     elif self.length2 == 1:
       
  1832       coords2 = str(self.start2 + 1)
       
  1833     else:
       
  1834       coords2 = str(self.start2 + 1) + "," + str(self.length2)
       
  1835     text = ["@@ -", coords1, " +", coords2, " @@\n"]
       
  1836     # Escape the body of the patch with %xx notation.
       
  1837     for (op, data) in self.diffs:
       
  1838       if op == diff_match_patch.DIFF_INSERT:
       
  1839         text.append("+")
       
  1840       elif op == diff_match_patch.DIFF_DELETE:
       
  1841         text.append("-")
       
  1842       elif op == diff_match_patch.DIFF_EQUAL:
       
  1843         text.append(" ")
       
  1844       # High ascii will raise UnicodeDecodeError.  Use Unicode instead.
       
  1845       data = data.encode("utf-8")
       
  1846       text.append(urllib.quote(data, "!~*'();/?:@&=+$,# ") + "\n")
       
  1847     return "".join(text)