1 // Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
2 // https://github.com/sergi/go-diff
3 // See the included LICENSE file for license details.
5 // go-diff is a Go implementation of Google's Diff, Match, and Patch library
6 // Original library is Copyright (c) 2006 Google Inc.
7 // http://code.google.com/p/google-diff-match-patch/
25 // Operation defines the operation of a diff item.
28 //go:generate stringer -type=Operation -trimprefix=Diff
31 // DiffDelete item represents a delete diff.
32 DiffDelete Operation = -1
33 // DiffInsert item represents an insert diff.
34 DiffInsert Operation = 1
35 // DiffEqual item represents an equal diff.
36 DiffEqual Operation = 0
39 // Diff represents one diff operation
45 // splice removes amount elements from slice at index index, replacing them with elements.
46 func splice(slice []Diff, index int, amount int, elements ...Diff) []Diff {
47 if len(elements) == amount {
48 // Easy case: overwrite the relevant items.
49 copy(slice[index:], elements)
52 if len(elements) < amount {
53 // Fewer new items than old.
54 // Copy in the new items.
55 copy(slice[index:], elements)
56 // Shift the remaining items left.
57 copy(slice[index+len(elements):], slice[index+amount:])
58 // Calculate the new end of the slice.
59 end := len(slice) - amount + len(elements)
60 // Zero stranded elements at end so that they can be garbage collected.
67 // More new items than old.
68 // Make room in slice for new elements.
69 // There's probably an even more efficient way to do this,
70 // but this is simple and clear.
71 need := len(slice) - amount + len(elements)
72 for len(slice) < need {
73 slice = append(slice, Diff{})
75 // Shift slice elements right to make room for new elements.
76 copy(slice[index+len(elements):], slice[index+amount:])
77 // Copy in new elements.
78 copy(slice[index:], elements)
82 // DiffMain finds the differences between two texts.
83 // If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character.
84 func (dmp *DiffMatchPatch) DiffMain(text1, text2 string, checklines bool) []Diff {
85 return dmp.DiffMainRunes([]rune(text1), []rune(text2), checklines)
88 // DiffMainRunes finds the differences between two rune sequences.
89 // If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character.
90 func (dmp *DiffMatchPatch) DiffMainRunes(text1, text2 []rune, checklines bool) []Diff {
91 var deadline time.Time
92 if dmp.DiffTimeout > 0 {
93 deadline = time.Now().Add(dmp.DiffTimeout)
95 return dmp.diffMainRunes(text1, text2, checklines, deadline)
98 func (dmp *DiffMatchPatch) diffMainRunes(text1, text2 []rune, checklines bool, deadline time.Time) []Diff {
99 if runesEqual(text1, text2) {
102 diffs = append(diffs, Diff{DiffEqual, string(text1)})
106 // Trim off common prefix (speedup).
107 commonlength := commonPrefixLength(text1, text2)
108 commonprefix := text1[:commonlength]
109 text1 = text1[commonlength:]
110 text2 = text2[commonlength:]
112 // Trim off common suffix (speedup).
113 commonlength = commonSuffixLength(text1, text2)
114 commonsuffix := text1[len(text1)-commonlength:]
115 text1 = text1[:len(text1)-commonlength]
116 text2 = text2[:len(text2)-commonlength]
118 // Compute the diff on the middle block.
119 diffs := dmp.diffCompute(text1, text2, checklines, deadline)
121 // Restore the prefix and suffix.
122 if len(commonprefix) != 0 {
123 diffs = append([]Diff{Diff{DiffEqual, string(commonprefix)}}, diffs...)
125 if len(commonsuffix) != 0 {
126 diffs = append(diffs, Diff{DiffEqual, string(commonsuffix)})
129 return dmp.DiffCleanupMerge(diffs)
132 // diffCompute finds the differences between two rune slices. Assumes that the texts do not have any common prefix or suffix.
133 func (dmp *DiffMatchPatch) diffCompute(text1, text2 []rune, checklines bool, deadline time.Time) []Diff {
136 // Just add some text (speedup).
137 return append(diffs, Diff{DiffInsert, string(text2)})
138 } else if len(text2) == 0 {
139 // Just delete some text (speedup).
140 return append(diffs, Diff{DiffDelete, string(text1)})
143 var longtext, shorttext []rune
144 if len(text1) > len(text2) {
152 if i := runesIndex(longtext, shorttext); i != -1 {
154 // Swap insertions for deletions if diff is reversed.
155 if len(text1) > len(text2) {
158 // Shorter text is inside the longer text (speedup).
160 Diff{op, string(longtext[:i])},
161 Diff{DiffEqual, string(shorttext)},
162 Diff{op, string(longtext[i+len(shorttext):])},
164 } else if len(shorttext) == 1 {
165 // Single character string.
166 // After the previous speedup, the character can't be an equality.
168 Diff{DiffDelete, string(text1)},
169 Diff{DiffInsert, string(text2)},
171 // Check to see if the problem can be split in two.
172 } else if hm := dmp.diffHalfMatch(text1, text2); hm != nil {
173 // A half-match was found, sort out the return data.
179 // Send both pairs off for separate processing.
180 diffsA := dmp.diffMainRunes(text1A, text2A, checklines, deadline)
181 diffsB := dmp.diffMainRunes(text1B, text2B, checklines, deadline)
182 // Merge the results.
184 diffs = append(diffs, Diff{DiffEqual, string(midCommon)})
185 diffs = append(diffs, diffsB...)
187 } else if checklines && len(text1) > 100 && len(text2) > 100 {
188 return dmp.diffLineMode(text1, text2, deadline)
190 return dmp.diffBisect(text1, text2, deadline)
193 // diffLineMode does a quick line-level diff on both []runes, then rediff the parts for greater accuracy. This speedup can produce non-minimal diffs.
194 func (dmp *DiffMatchPatch) diffLineMode(text1, text2 []rune, deadline time.Time) []Diff {
195 // Scan the text on a line-by-line basis first.
196 text1, text2, linearray := dmp.diffLinesToRunes(text1, text2)
198 diffs := dmp.diffMainRunes(text1, text2, false, deadline)
200 // Convert the diff back to original text.
201 diffs = dmp.DiffCharsToLines(diffs, linearray)
202 // Eliminate freak matches (e.g. blank lines)
203 diffs = dmp.DiffCleanupSemantic(diffs)
205 // Rediff any replacement blocks, this time character-by-character.
206 // Add a dummy entry at the end.
207 diffs = append(diffs, Diff{DiffEqual, ""})
213 // NOTE: Rune slices are slower than using strings in this case.
217 for pointer < len(diffs) {
218 switch diffs[pointer].Type {
221 textInsert += diffs[pointer].Text
224 textDelete += diffs[pointer].Text
226 // Upon reaching an equality, check for prior redundancies.
227 if countDelete >= 1 && countInsert >= 1 {
228 // Delete the offending records and add the merged ones.
229 diffs = splice(diffs, pointer-countDelete-countInsert,
230 countDelete+countInsert)
232 pointer = pointer - countDelete - countInsert
233 a := dmp.diffMainRunes([]rune(textDelete), []rune(textInsert), false, deadline)
234 for j := len(a) - 1; j >= 0; j-- {
235 diffs = splice(diffs, pointer, 0, a[j])
237 pointer = pointer + len(a)
248 return diffs[:len(diffs)-1] // Remove the dummy entry at the end.
251 // DiffBisect finds the 'middle snake' of a diff, split the problem in two and return the recursively constructed diff.
252 // If an invalid UTF-8 sequence is encountered, it will be replaced by the Unicode replacement character.
253 // See Myers 1986 paper: An O(ND) Difference Algorithm and Its Variations.
254 func (dmp *DiffMatchPatch) DiffBisect(text1, text2 string, deadline time.Time) []Diff {
255 // Unused in this code, but retained for interface compatibility.
256 return dmp.diffBisect([]rune(text1), []rune(text2), deadline)
259 // diffBisect finds the 'middle snake' of a diff, splits the problem in two and returns the recursively constructed diff.
260 // See Myers's 1986 paper: An O(ND) Difference Algorithm and Its Variations.
261 func (dmp *DiffMatchPatch) diffBisect(runes1, runes2 []rune, deadline time.Time) []Diff {
262 // Cache the text lengths to prevent multiple calls.
263 runes1Len, runes2Len := len(runes1), len(runes2)
265 maxD := (runes1Len + runes2Len + 1) / 2
269 v1 := make([]int, vLength)
270 v2 := make([]int, vLength)
278 delta := runes1Len - runes2Len
279 // If the total number of characters is odd, then the front path will collide with the reverse path.
280 front := (delta%2 != 0)
281 // Offsets for start and end of k loop. Prevents mapping of space beyond the grid.
286 for d := 0; d < maxD; d++ {
287 // Bail out if deadline is reached.
288 if !deadline.IsZero() && d%16 == 0 && time.Now().After(deadline) {
292 // Walk the front path one step.
293 for k1 := -d + k1start; k1 <= d-k1end; k1 += 2 {
294 k1Offset := vOffset + k1
297 if k1 == -d || (k1 != d && v1[k1Offset-1] < v1[k1Offset+1]) {
300 x1 = v1[k1Offset-1] + 1
304 for x1 < runes1Len && y1 < runes2Len {
305 if runes1[x1] != runes2[y1] {
313 // Ran off the right of the graph.
315 } else if y1 > runes2Len {
316 // Ran off the bottom of the graph.
319 k2Offset := vOffset + delta - k1
320 if k2Offset >= 0 && k2Offset < vLength && v2[k2Offset] != -1 {
321 // Mirror x2 onto top-left coordinate system.
322 x2 := runes1Len - v2[k2Offset]
325 return dmp.diffBisectSplit(runes1, runes2, x1, y1, deadline)
330 // Walk the reverse path one step.
331 for k2 := -d + k2start; k2 <= d-k2end; k2 += 2 {
332 k2Offset := vOffset + k2
334 if k2 == -d || (k2 != d && v2[k2Offset-1] < v2[k2Offset+1]) {
337 x2 = v2[k2Offset-1] + 1
340 for x2 < runes1Len && y2 < runes2Len {
341 if runes1[runes1Len-x2-1] != runes2[runes2Len-y2-1] {
349 // Ran off the left of the graph.
351 } else if y2 > runes2Len {
352 // Ran off the top of the graph.
355 k1Offset := vOffset + delta - k2
356 if k1Offset >= 0 && k1Offset < vLength && v1[k1Offset] != -1 {
358 y1 := vOffset + x1 - k1Offset
359 // Mirror x2 onto top-left coordinate system.
363 return dmp.diffBisectSplit(runes1, runes2, x1, y1, deadline)
369 // Diff took too long and hit the deadline or number of diffs equals number of characters, no commonality at all.
371 Diff{DiffDelete, string(runes1)},
372 Diff{DiffInsert, string(runes2)},
376 func (dmp *DiffMatchPatch) diffBisectSplit(runes1, runes2 []rune, x, y int,
377 deadline time.Time) []Diff {
378 runes1a := runes1[:x]
379 runes2a := runes2[:y]
380 runes1b := runes1[x:]
381 runes2b := runes2[y:]
383 // Compute both diffs serially.
384 diffs := dmp.diffMainRunes(runes1a, runes2a, false, deadline)
385 diffsb := dmp.diffMainRunes(runes1b, runes2b, false, deadline)
387 return append(diffs, diffsb...)
390 // DiffLinesToChars splits two texts into a list of strings, and educes the texts to a string of hashes where each Unicode character represents one line.
391 // It's slightly faster to call DiffLinesToRunes first, followed by DiffMainRunes.
392 func (dmp *DiffMatchPatch) DiffLinesToChars(text1, text2 string) (string, string, []string) {
393 chars1, chars2, lineArray := dmp.DiffLinesToRunes(text1, text2)
394 return string(chars1), string(chars2), lineArray
397 // DiffLinesToRunes splits two texts into a list of runes. Each rune represents one line.
398 func (dmp *DiffMatchPatch) DiffLinesToRunes(text1, text2 string) ([]rune, []rune, []string) {
399 // '\x00' is a valid character, but various debuggers don't like it. So we'll insert a junk entry to avoid generating a null character.
400 lineArray := []string{""} // e.g. lineArray[4] == 'Hello\n'
401 lineHash := map[string]int{} // e.g. lineHash['Hello\n'] == 4
403 chars1 := dmp.diffLinesToRunesMunge(text1, &lineArray, lineHash)
404 chars2 := dmp.diffLinesToRunesMunge(text2, &lineArray, lineHash)
406 return chars1, chars2, lineArray
409 func (dmp *DiffMatchPatch) diffLinesToRunes(text1, text2 []rune) ([]rune, []rune, []string) {
410 return dmp.DiffLinesToRunes(string(text1), string(text2))
413 // diffLinesToRunesMunge splits a text into an array of strings, and reduces the texts to a []rune where each Unicode character represents one line.
414 // We use strings instead of []runes as input mainly because you can't use []rune as a map key.
415 func (dmp *DiffMatchPatch) diffLinesToRunesMunge(text string, lineArray *[]string, lineHash map[string]int) []rune {
416 // Walk the text, pulling out a substring for each line. text.split('\n') would would temporarily double our memory footprint. Modifying text would create many large strings to garbage collect.
421 for lineEnd < len(text)-1 {
422 lineEnd = indexOf(text, "\n", lineStart)
425 lineEnd = len(text) - 1
428 line := text[lineStart : lineEnd+1]
429 lineStart = lineEnd + 1
430 lineValue, ok := lineHash[line]
433 runes = append(runes, rune(lineValue))
435 *lineArray = append(*lineArray, line)
436 lineHash[line] = len(*lineArray) - 1
437 runes = append(runes, rune(len(*lineArray)-1))
444 // DiffCharsToLines rehydrates the text in a diff from a string of line hashes to real lines of text.
445 func (dmp *DiffMatchPatch) DiffCharsToLines(diffs []Diff, lineArray []string) []Diff {
446 hydrated := make([]Diff, 0, len(diffs))
447 for _, aDiff := range diffs {
449 text := make([]string, len(chars))
451 for i, r := range chars {
452 text[i] = lineArray[r]
455 aDiff.Text = strings.Join(text, "")
456 hydrated = append(hydrated, aDiff)
461 // DiffCommonPrefix determines the common prefix length of two strings.
462 func (dmp *DiffMatchPatch) DiffCommonPrefix(text1, text2 string) int {
463 // Unused in this code, but retained for interface compatibility.
464 return commonPrefixLength([]rune(text1), []rune(text2))
467 // DiffCommonSuffix determines the common suffix length of two strings.
468 func (dmp *DiffMatchPatch) DiffCommonSuffix(text1, text2 string) int {
469 // Unused in this code, but retained for interface compatibility.
470 return commonSuffixLength([]rune(text1), []rune(text2))
473 // commonPrefixLength returns the length of the common prefix of two rune slices.
474 func commonPrefixLength(text1, text2 []rune) int {
475 // Linear search. See comment in commonSuffixLength.
477 for ; n < len(text1) && n < len(text2); n++ {
478 if text1[n] != text2[n] {
485 // commonSuffixLength returns the length of the common suffix of two rune slices.
486 func commonSuffixLength(text1, text2 []rune) int {
487 // Use linear search rather than the binary search discussed at https://neil.fraser.name/news/2007/10/09/.
488 // See discussion at https://github.com/sergi/go-diff/issues/54.
494 if i1 < 0 || i2 < 0 || text1[i1] != text2[i2] {
500 // DiffCommonOverlap determines if the suffix of one string is the prefix of another.
501 func (dmp *DiffMatchPatch) DiffCommonOverlap(text1 string, text2 string) int {
502 // Cache the text lengths to prevent multiple calls.
503 text1Length := len(text1)
504 text2Length := len(text2)
505 // Eliminate the null case.
506 if text1Length == 0 || text2Length == 0 {
509 // Truncate the longer string.
510 if text1Length > text2Length {
511 text1 = text1[text1Length-text2Length:]
512 } else if text1Length < text2Length {
513 text2 = text2[0:text1Length]
515 textLength := int(math.Min(float64(text1Length), float64(text2Length)))
516 // Quick check for the worst case.
521 // Start by looking for a single character match and increase length until no match is found. Performance analysis: http://neil.fraser.name/news/2010/11/04/
525 pattern := text1[textLength-length:]
526 found := strings.Index(text2, pattern)
531 if found == 0 || text1[textLength-length:] == text2[0:length] {
540 // DiffHalfMatch checks whether the two texts share a substring which is at least half the length of the longer text. This speedup can produce non-minimal diffs.
541 func (dmp *DiffMatchPatch) DiffHalfMatch(text1, text2 string) []string {
542 // Unused in this code, but retained for interface compatibility.
543 runeSlices := dmp.diffHalfMatch([]rune(text1), []rune(text2))
544 if runeSlices == nil {
548 result := make([]string, len(runeSlices))
549 for i, r := range runeSlices {
550 result[i] = string(r)
555 func (dmp *DiffMatchPatch) diffHalfMatch(text1, text2 []rune) [][]rune {
556 if dmp.DiffTimeout <= 0 {
557 // Don't risk returning a non-optimal diff if we have unlimited time.
561 var longtext, shorttext []rune
562 if len(text1) > len(text2) {
570 if len(longtext) < 4 || len(shorttext)*2 < len(longtext) {
571 return nil // Pointless.
574 // First check if the second quarter is the seed for a half-match.
575 hm1 := dmp.diffHalfMatchI(longtext, shorttext, int(float64(len(longtext)+3)/4))
577 // Check again based on the third quarter.
578 hm2 := dmp.diffHalfMatchI(longtext, shorttext, int(float64(len(longtext)+1)/2))
581 if hm1 == nil && hm2 == nil {
583 } else if hm2 == nil {
585 } else if hm1 == nil {
588 // Both matched. Select the longest.
589 if len(hm1[4]) > len(hm2[4]) {
596 // A half-match was found, sort out the return data.
597 if len(text1) > len(text2) {
601 return [][]rune{hm[2], hm[3], hm[0], hm[1], hm[4]}
604 // diffHalfMatchI checks if a substring of shorttext exist within longtext such that the substring is at least half the length of longtext?
605 // Returns a slice containing the prefix of longtext, the suffix of longtext, the prefix of shorttext, the suffix of shorttext and the common middle, or null if there was no match.
606 func (dmp *DiffMatchPatch) diffHalfMatchI(l, s []rune, i int) [][]rune {
607 var bestCommonA []rune
608 var bestCommonB []rune
609 var bestCommonLen int
610 var bestLongtextA []rune
611 var bestLongtextB []rune
612 var bestShorttextA []rune
613 var bestShorttextB []rune
615 // Start with a 1/4 length substring at position i as a seed.
616 seed := l[i : i+len(l)/4]
618 for j := runesIndexOf(s, seed, 0); j != -1; j = runesIndexOf(s, seed, j+1) {
619 prefixLength := commonPrefixLength(l[i:], s[j:])
620 suffixLength := commonSuffixLength(l[:i], s[:j])
622 if bestCommonLen < suffixLength+prefixLength {
623 bestCommonA = s[j-suffixLength : j]
624 bestCommonB = s[j : j+prefixLength]
625 bestCommonLen = len(bestCommonA) + len(bestCommonB)
626 bestLongtextA = l[:i-suffixLength]
627 bestLongtextB = l[i+prefixLength:]
628 bestShorttextA = s[:j-suffixLength]
629 bestShorttextB = s[j+prefixLength:]
633 if bestCommonLen*2 < len(l) {
642 append(bestCommonA, bestCommonB...),
646 // DiffCleanupSemantic reduces the number of edits by eliminating semantically trivial equalities.
647 func (dmp *DiffMatchPatch) DiffCleanupSemantic(diffs []Diff) []Diff {
649 // Stack of indices where equalities are found.
650 equalities := make([]int, 0, len(diffs))
652 var lastequality string
653 // Always equal to diffs[equalities[equalitiesLength - 1]][1]
654 var pointer int // Index of current position.
655 // Number of characters that changed prior to the equality.
656 var lengthInsertions1, lengthDeletions1 int
657 // Number of characters that changed after the equality.
658 var lengthInsertions2, lengthDeletions2 int
660 for pointer < len(diffs) {
661 if diffs[pointer].Type == DiffEqual {
663 equalities = append(equalities, pointer)
664 lengthInsertions1 = lengthInsertions2
665 lengthDeletions1 = lengthDeletions2
666 lengthInsertions2 = 0
668 lastequality = diffs[pointer].Text
670 // An insertion or deletion.
672 if diffs[pointer].Type == DiffInsert {
673 lengthInsertions2 += len(diffs[pointer].Text)
675 lengthDeletions2 += len(diffs[pointer].Text)
677 // Eliminate an equality that is smaller or equal to the edits on both sides of it.
678 difference1 := int(math.Max(float64(lengthInsertions1), float64(lengthDeletions1)))
679 difference2 := int(math.Max(float64(lengthInsertions2), float64(lengthDeletions2)))
680 if len(lastequality) > 0 &&
681 (len(lastequality) <= difference1) &&
682 (len(lastequality) <= difference2) {
684 insPoint := equalities[len(equalities)-1]
685 diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality})
687 // Change second copy to insert.
688 diffs[insPoint+1].Type = DiffInsert
689 // Throw away the equality we just deleted.
690 equalities = equalities[:len(equalities)-1]
692 if len(equalities) > 0 {
693 equalities = equalities[:len(equalities)-1]
696 if len(equalities) > 0 {
697 pointer = equalities[len(equalities)-1]
700 lengthInsertions1 = 0 // Reset the counters.
702 lengthInsertions2 = 0
711 // Normalize the diff.
713 diffs = dmp.DiffCleanupMerge(diffs)
715 diffs = dmp.DiffCleanupSemanticLossless(diffs)
716 // Find any overlaps between deletions and insertions.
717 // e.g: <del>abcxxx</del><ins>xxxdef</ins>
718 // -> <del>abc</del>xxx<ins>def</ins>
719 // e.g: <del>xxxabc</del><ins>defxxx</ins>
720 // -> <ins>def</ins>xxx<del>abc</del>
721 // Only extract an overlap if it is as big as the edit ahead or behind it.
723 for pointer < len(diffs) {
724 if diffs[pointer-1].Type == DiffDelete &&
725 diffs[pointer].Type == DiffInsert {
726 deletion := diffs[pointer-1].Text
727 insertion := diffs[pointer].Text
728 overlapLength1 := dmp.DiffCommonOverlap(deletion, insertion)
729 overlapLength2 := dmp.DiffCommonOverlap(insertion, deletion)
730 if overlapLength1 >= overlapLength2 {
731 if float64(overlapLength1) >= float64(len(deletion))/2 ||
732 float64(overlapLength1) >= float64(len(insertion))/2 {
734 // Overlap found. Insert an equality and trim the surrounding edits.
735 diffs = splice(diffs, pointer, 0, Diff{DiffEqual, insertion[:overlapLength1]})
736 diffs[pointer-1].Text =
737 deletion[0 : len(deletion)-overlapLength1]
738 diffs[pointer+1].Text = insertion[overlapLength1:]
742 if float64(overlapLength2) >= float64(len(deletion))/2 ||
743 float64(overlapLength2) >= float64(len(insertion))/2 {
744 // Reverse overlap found. Insert an equality and swap and trim the surrounding edits.
745 overlap := Diff{DiffEqual, deletion[:overlapLength2]}
746 diffs = splice(diffs, pointer, 0, overlap)
747 diffs[pointer-1].Type = DiffInsert
748 diffs[pointer-1].Text = insertion[0 : len(insertion)-overlapLength2]
749 diffs[pointer+1].Type = DiffDelete
750 diffs[pointer+1].Text = deletion[overlapLength2:]
762 // Define some regex patterns for matching boundaries.
764 nonAlphaNumericRegex = regexp.MustCompile(`[^a-zA-Z0-9]`)
765 whitespaceRegex = regexp.MustCompile(`\s`)
766 linebreakRegex = regexp.MustCompile(`[\r\n]`)
767 blanklineEndRegex = regexp.MustCompile(`\n\r?\n$`)
768 blanklineStartRegex = regexp.MustCompile(`^\r?\n\r?\n`)
771 // diffCleanupSemanticScore computes a score representing whether the internal boundary falls on logical boundaries.
772 // Scores range from 6 (best) to 0 (worst). Closure, but does not reference any external variables.
773 func diffCleanupSemanticScore(one, two string) int {
774 if len(one) == 0 || len(two) == 0 {
775 // Edges are the best.
779 // Each port of this function behaves slightly differently due to subtle differences in each language's definition of things like 'whitespace'. Since this function's purpose is largely cosmetic, the choice has been made to use each language's native features rather than force total conformity.
780 rune1, _ := utf8.DecodeLastRuneInString(one)
781 rune2, _ := utf8.DecodeRuneInString(two)
782 char1 := string(rune1)
783 char2 := string(rune2)
785 nonAlphaNumeric1 := nonAlphaNumericRegex.MatchString(char1)
786 nonAlphaNumeric2 := nonAlphaNumericRegex.MatchString(char2)
787 whitespace1 := nonAlphaNumeric1 && whitespaceRegex.MatchString(char1)
788 whitespace2 := nonAlphaNumeric2 && whitespaceRegex.MatchString(char2)
789 lineBreak1 := whitespace1 && linebreakRegex.MatchString(char1)
790 lineBreak2 := whitespace2 && linebreakRegex.MatchString(char2)
791 blankLine1 := lineBreak1 && blanklineEndRegex.MatchString(one)
792 blankLine2 := lineBreak2 && blanklineEndRegex.MatchString(two)
794 if blankLine1 || blankLine2 {
795 // Five points for blank lines.
797 } else if lineBreak1 || lineBreak2 {
798 // Four points for line breaks.
800 } else if nonAlphaNumeric1 && !whitespace1 && whitespace2 {
801 // Three points for end of sentences.
803 } else if whitespace1 || whitespace2 {
804 // Two points for whitespace.
806 } else if nonAlphaNumeric1 || nonAlphaNumeric2 {
807 // One point for non-alphanumeric.
813 // DiffCleanupSemanticLossless looks for single edits surrounded on both sides by equalities which can be shifted sideways to align the edit to a word boundary.
814 // E.g: The c<ins>at c</ins>ame. -> The <ins>cat </ins>came.
815 func (dmp *DiffMatchPatch) DiffCleanupSemanticLossless(diffs []Diff) []Diff {
818 // Intentionally ignore the first and last element (don't need checking).
819 for pointer < len(diffs)-1 {
820 if diffs[pointer-1].Type == DiffEqual &&
821 diffs[pointer+1].Type == DiffEqual {
823 // This is a single edit surrounded by equalities.
824 equality1 := diffs[pointer-1].Text
825 edit := diffs[pointer].Text
826 equality2 := diffs[pointer+1].Text
828 // First, shift the edit as far left as possible.
829 commonOffset := dmp.DiffCommonSuffix(equality1, edit)
830 if commonOffset > 0 {
831 commonString := edit[len(edit)-commonOffset:]
832 equality1 = equality1[0 : len(equality1)-commonOffset]
833 edit = commonString + edit[:len(edit)-commonOffset]
834 equality2 = commonString + equality2
837 // Second, step character by character right, looking for the best fit.
838 bestEquality1 := equality1
840 bestEquality2 := equality2
841 bestScore := diffCleanupSemanticScore(equality1, edit) +
842 diffCleanupSemanticScore(edit, equality2)
844 for len(edit) != 0 && len(equality2) != 0 {
845 _, sz := utf8.DecodeRuneInString(edit)
846 if len(equality2) < sz || edit[:sz] != equality2[:sz] {
849 equality1 += edit[:sz]
850 edit = edit[sz:] + equality2[:sz]
851 equality2 = equality2[sz:]
852 score := diffCleanupSemanticScore(equality1, edit) +
853 diffCleanupSemanticScore(edit, equality2)
854 // The >= encourages trailing rather than leading whitespace on edits.
855 if score >= bestScore {
857 bestEquality1 = equality1
859 bestEquality2 = equality2
863 if diffs[pointer-1].Text != bestEquality1 {
864 // We have an improvement, save it back to the diff.
865 if len(bestEquality1) != 0 {
866 diffs[pointer-1].Text = bestEquality1
868 diffs = splice(diffs, pointer-1, 1)
872 diffs[pointer].Text = bestEdit
873 if len(bestEquality2) != 0 {
874 diffs[pointer+1].Text = bestEquality2
876 diffs = append(diffs[:pointer+1], diffs[pointer+2:]...)
887 // DiffCleanupEfficiency reduces the number of edits by eliminating operationally trivial equalities.
888 func (dmp *DiffMatchPatch) DiffCleanupEfficiency(diffs []Diff) []Diff {
890 // Stack of indices where equalities are found.
891 type equality struct {
895 var equalities *equality
896 // Always equal to equalities[equalitiesLength-1][1]
898 pointer := 0 // Index of current position.
899 // Is there an insertion operation before the last equality.
901 // Is there a deletion operation before the last equality.
903 // Is there an insertion operation after the last equality.
905 // Is there a deletion operation after the last equality.
907 for pointer < len(diffs) {
908 if diffs[pointer].Type == DiffEqual { // Equality found.
909 if len(diffs[pointer].Text) < dmp.DiffEditCost &&
910 (postIns || postDel) {
912 equalities = &equality{
918 lastequality = diffs[pointer].Text
920 // Not a candidate, and can never become one.
926 } else { // An insertion or deletion.
927 if diffs[pointer].Type == DiffDelete {
933 // Five types to be split:
934 // <ins>A</ins><del>B</del>XY<ins>C</ins><del>D</del>
935 // <ins>A</ins>X<ins>C</ins><del>D</del>
936 // <ins>A</ins><del>B</del>X<ins>C</ins>
937 // <ins>A</del>X<ins>C</ins><del>D</del>
938 // <ins>A</ins><del>B</del>X<del>C</del>
952 if len(lastequality) > 0 &&
953 ((preIns && preDel && postIns && postDel) ||
954 ((len(lastequality) < dmp.DiffEditCost/2) && sumPres == 3)) {
956 insPoint := equalities.data
959 diffs = splice(diffs, insPoint, 0, Diff{DiffDelete, lastequality})
961 // Change second copy to insert.
962 diffs[insPoint+1].Type = DiffInsert
963 // Throw away the equality we just deleted.
964 equalities = equalities.next
967 if preIns && preDel {
968 // No changes made which could affect previous entry, keep going.
973 if equalities != nil {
974 equalities = equalities.next
976 if equalities != nil {
977 pointer = equalities.data
991 diffs = dmp.DiffCleanupMerge(diffs)
997 // DiffCleanupMerge reorders and merges like edit sections. Merge equalities.
998 // Any edit section can move as long as it doesn't cross an equality.
999 func (dmp *DiffMatchPatch) DiffCleanupMerge(diffs []Diff) []Diff {
1000 // Add a dummy entry at the end.
1001 diffs = append(diffs, Diff{DiffEqual, ""})
1006 textDelete := []rune(nil)
1007 textInsert := []rune(nil)
1009 for pointer < len(diffs) {
1010 switch diffs[pointer].Type {
1013 textInsert = append(textInsert, []rune(diffs[pointer].Text)...)
1018 textDelete = append(textDelete, []rune(diffs[pointer].Text)...)
1022 // Upon reaching an equality, check for prior redundancies.
1023 if countDelete+countInsert > 1 {
1024 if countDelete != 0 && countInsert != 0 {
1025 // Factor out any common prefixies.
1026 commonlength = commonPrefixLength(textInsert, textDelete)
1027 if commonlength != 0 {
1028 x := pointer - countDelete - countInsert
1029 if x > 0 && diffs[x-1].Type == DiffEqual {
1030 diffs[x-1].Text += string(textInsert[:commonlength])
1032 diffs = append([]Diff{Diff{DiffEqual, string(textInsert[:commonlength])}}, diffs...)
1035 textInsert = textInsert[commonlength:]
1036 textDelete = textDelete[commonlength:]
1038 // Factor out any common suffixies.
1039 commonlength = commonSuffixLength(textInsert, textDelete)
1040 if commonlength != 0 {
1041 insertIndex := len(textInsert) - commonlength
1042 deleteIndex := len(textDelete) - commonlength
1043 diffs[pointer].Text = string(textInsert[insertIndex:]) + diffs[pointer].Text
1044 textInsert = textInsert[:insertIndex]
1045 textDelete = textDelete[:deleteIndex]
1048 // Delete the offending records and add the merged ones.
1049 if countDelete == 0 {
1050 diffs = splice(diffs, pointer-countInsert,
1051 countDelete+countInsert,
1052 Diff{DiffInsert, string(textInsert)})
1053 } else if countInsert == 0 {
1054 diffs = splice(diffs, pointer-countDelete,
1055 countDelete+countInsert,
1056 Diff{DiffDelete, string(textDelete)})
1058 diffs = splice(diffs, pointer-countDelete-countInsert,
1059 countDelete+countInsert,
1060 Diff{DiffDelete, string(textDelete)},
1061 Diff{DiffInsert, string(textInsert)})
1064 pointer = pointer - countDelete - countInsert + 1
1065 if countDelete != 0 {
1068 if countInsert != 0 {
1071 } else if pointer != 0 && diffs[pointer-1].Type == DiffEqual {
1072 // Merge this equality with the previous one.
1073 diffs[pointer-1].Text += diffs[pointer].Text
1074 diffs = append(diffs[:pointer], diffs[pointer+1:]...)
1086 if len(diffs[len(diffs)-1].Text) == 0 {
1087 diffs = diffs[0 : len(diffs)-1] // Remove the dummy entry at the end.
1090 // Second pass: look for single edits surrounded on both sides by equalities which can be shifted sideways to eliminate an equality. E.g: A<ins>BA</ins>C -> <ins>AB</ins>AC
1093 // Intentionally ignore the first and last element (don't need checking).
1094 for pointer < (len(diffs) - 1) {
1095 if diffs[pointer-1].Type == DiffEqual &&
1096 diffs[pointer+1].Type == DiffEqual {
1097 // This is a single edit surrounded by equalities.
1098 if strings.HasSuffix(diffs[pointer].Text, diffs[pointer-1].Text) {
1099 // Shift the edit over the previous equality.
1100 diffs[pointer].Text = diffs[pointer-1].Text +
1101 diffs[pointer].Text[:len(diffs[pointer].Text)-len(diffs[pointer-1].Text)]
1102 diffs[pointer+1].Text = diffs[pointer-1].Text + diffs[pointer+1].Text
1103 diffs = splice(diffs, pointer-1, 1)
1105 } else if strings.HasPrefix(diffs[pointer].Text, diffs[pointer+1].Text) {
1106 // Shift the edit over the next equality.
1107 diffs[pointer-1].Text += diffs[pointer+1].Text
1108 diffs[pointer].Text =
1109 diffs[pointer].Text[len(diffs[pointer+1].Text):] + diffs[pointer+1].Text
1110 diffs = splice(diffs, pointer+1, 1)
1117 // If shifts were made, the diff needs reordering and another shift sweep.
1119 diffs = dmp.DiffCleanupMerge(diffs)
1125 // DiffXIndex returns the equivalent location in s2.
1126 func (dmp *DiffMatchPatch) DiffXIndex(diffs []Diff, loc int) int {
1132 for i := 0; i < len(diffs); i++ {
1134 if aDiff.Type != DiffInsert {
1135 // Equality or deletion.
1136 chars1 += len(aDiff.Text)
1138 if aDiff.Type != DiffDelete {
1139 // Equality or insertion.
1140 chars2 += len(aDiff.Text)
1143 // Overshot the location.
1150 if lastDiff.Type == DiffDelete {
1151 // The location was deleted.
1154 // Add the remaining character length.
1155 return lastChars2 + (loc - lastChars1)
1158 // DiffPrettyHtml converts a []Diff into a pretty HTML report.
1159 // It is intended as an example from which to write one's own display functions.
1160 func (dmp *DiffMatchPatch) DiffPrettyHtml(diffs []Diff) string {
1161 var buff bytes.Buffer
1162 for _, diff := range diffs {
1163 text := strings.Replace(html.EscapeString(diff.Text), "\n", "¶<br>", -1)
1166 _, _ = buff.WriteString("<ins style=\"background:#e6ffe6;\">")
1167 _, _ = buff.WriteString(text)
1168 _, _ = buff.WriteString("</ins>")
1170 _, _ = buff.WriteString("<del style=\"background:#ffe6e6;\">")
1171 _, _ = buff.WriteString(text)
1172 _, _ = buff.WriteString("</del>")
1174 _, _ = buff.WriteString("<span>")
1175 _, _ = buff.WriteString(text)
1176 _, _ = buff.WriteString("</span>")
1179 return buff.String()
1182 // DiffPrettyText converts a []Diff into a colored text report.
1183 func (dmp *DiffMatchPatch) DiffPrettyText(diffs []Diff) string {
1184 var buff bytes.Buffer
1185 for _, diff := range diffs {
1190 _, _ = buff.WriteString("\x1b[32m")
1191 _, _ = buff.WriteString(text)
1192 _, _ = buff.WriteString("\x1b[0m")
1194 _, _ = buff.WriteString("\x1b[31m")
1195 _, _ = buff.WriteString(text)
1196 _, _ = buff.WriteString("\x1b[0m")
1198 _, _ = buff.WriteString(text)
1202 return buff.String()
1205 // DiffText1 computes and returns the source text (all equalities and deletions).
1206 func (dmp *DiffMatchPatch) DiffText1(diffs []Diff) string {
1207 //StringBuilder text = new StringBuilder()
1208 var text bytes.Buffer
1210 for _, aDiff := range diffs {
1211 if aDiff.Type != DiffInsert {
1212 _, _ = text.WriteString(aDiff.Text)
1215 return text.String()
1218 // DiffText2 computes and returns the destination text (all equalities and insertions).
1219 func (dmp *DiffMatchPatch) DiffText2(diffs []Diff) string {
1220 var text bytes.Buffer
1222 for _, aDiff := range diffs {
1223 if aDiff.Type != DiffDelete {
1224 _, _ = text.WriteString(aDiff.Text)
1227 return text.String()
1230 // DiffLevenshtein computes the Levenshtein distance that is the number of inserted, deleted or substituted characters.
1231 func (dmp *DiffMatchPatch) DiffLevenshtein(diffs []Diff) int {
1236 for _, aDiff := range diffs {
1239 insertions += utf8.RuneCountInString(aDiff.Text)
1241 deletions += utf8.RuneCountInString(aDiff.Text)
1243 // A deletion and an insertion is one substitution.
1244 levenshtein += max(insertions, deletions)
1250 levenshtein += max(insertions, deletions)
1254 // DiffToDelta crushes the diff into an encoded string which describes the operations required to transform text1 into text2.
1255 // E.g. =3\t-2\t+ing -> Keep 3 chars, delete 2 chars, insert 'ing'. Operations are tab-separated. Inserted text is escaped using %xx notation.
1256 func (dmp *DiffMatchPatch) DiffToDelta(diffs []Diff) string {
1257 var text bytes.Buffer
1258 for _, aDiff := range diffs {
1261 _, _ = text.WriteString("+")
1262 _, _ = text.WriteString(strings.Replace(url.QueryEscape(aDiff.Text), "+", " ", -1))
1263 _, _ = text.WriteString("\t")
1266 _, _ = text.WriteString("-")
1267 _, _ = text.WriteString(strconv.Itoa(utf8.RuneCountInString(aDiff.Text)))
1268 _, _ = text.WriteString("\t")
1271 _, _ = text.WriteString("=")
1272 _, _ = text.WriteString(strconv.Itoa(utf8.RuneCountInString(aDiff.Text)))
1273 _, _ = text.WriteString("\t")
1277 delta := text.String()
1278 if len(delta) != 0 {
1279 // Strip off trailing tab character.
1280 delta = delta[0 : utf8.RuneCountInString(delta)-1]
1281 delta = unescaper.Replace(delta)
1286 // DiffFromDelta given the original text1, and an encoded string which describes the operations required to transform text1 into text2, comAdde the full diff.
1287 func (dmp *DiffMatchPatch) DiffFromDelta(text1 string, delta string) (diffs []Diff, err error) {
1289 runes := []rune(text1)
1291 for _, token := range strings.Split(delta, "\t") {
1292 if len(token) == 0 {
1293 // Blank tokens are ok (from a trailing \t).
1297 // Each token begins with a one character parameter which specifies the operation of this token (delete, insert, equality).
1300 switch op := token[0]; op {
1302 // Decode would Diff all "+" to " "
1303 param = strings.Replace(param, "+", "%2b", -1)
1304 param, err = url.QueryUnescape(param)
1308 if !utf8.ValidString(param) {
1309 return nil, fmt.Errorf("invalid UTF-8 token: %q", param)
1312 diffs = append(diffs, Diff{DiffInsert, param})
1314 n, err := strconv.ParseInt(param, 10, 0)
1318 return nil, errors.New("Negative number in DiffFromDelta: " + param)
1322 // Break out if we are out of bounds, go1.6 can't handle this very well
1326 // Remember that string slicing is by byte - we want by rune here.
1327 text := string(runes[i-int(n) : i])
1330 diffs = append(diffs, Diff{DiffEqual, text})
1332 diffs = append(diffs, Diff{DiffDelete, text})
1335 // Anything else is an error.
1336 return nil, errors.New("Invalid diff operation in DiffFromDelta: " + string(token[0]))
1340 if i != len(runes) {
1341 return nil, fmt.Errorf("Delta length (%v) is different from source text length (%v)", i, len(text1))