1 // Copyright (c) 2012-2016 The go-diff authors. All rights reserved.
2 // https://github.com/sergi/go-diff
3 // See the included LICENSE file for license details.
5 // go-diff is a Go implementation of Google's Diff, Match, and Patch library
6 // Original library is Copyright (c) 2006 Google Inc.
7 // http://code.google.com/p/google-diff-match-patch/
15 // MatchMain locates the best instance of 'pattern' in 'text' near 'loc'.
16 // Returns -1 if no match found.
17 func (dmp *DiffMatchPatch) MatchMain(text, pattern string, loc int) int {
18 // Check for null inputs not needed since null can't be passed in C#.
20 loc = int(math.Max(0, math.Min(float64(loc), float64(len(text)))))
22 // Shortcut (potentially not guaranteed by the algorithm)
24 } else if len(text) == 0 {
27 } else if loc+len(pattern) <= len(text) && text[loc:loc+len(pattern)] == pattern {
28 // Perfect match at the perfect spot! (Includes case of null pattern)
31 // Do a fuzzy compare.
32 return dmp.MatchBitap(text, pattern, loc)
35 // MatchBitap locates the best instance of 'pattern' in 'text' near 'loc' using the Bitap algorithm.
36 // Returns -1 if no match was found.
37 func (dmp *DiffMatchPatch) MatchBitap(text, pattern string, loc int) int {
38 // Initialise the alphabet.
39 s := dmp.MatchAlphabet(pattern)
41 // Highest score beyond which we give up.
42 scoreThreshold := dmp.MatchThreshold
43 // Is there a nearby exact match? (speedup)
44 bestLoc := indexOf(text, pattern, loc)
46 scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc,
47 pattern), scoreThreshold)
48 // What about in the other direction? (speedup)
49 bestLoc = lastIndexOf(text, pattern, loc+len(pattern))
51 scoreThreshold = math.Min(dmp.matchBitapScore(0, bestLoc, loc,
52 pattern), scoreThreshold)
56 // Initialise the bit arrays.
57 matchmask := 1 << uint((len(pattern) - 1))
60 var binMin, binMid int
61 binMax := len(pattern) + len(text)
63 for d := 0; d < len(pattern); d++ {
64 // Scan for the best match; each iteration allows for one more error. Run a binary search to determine how far from 'loc' we can stray at this error level.
68 if dmp.matchBitapScore(d, loc+binMid, loc, pattern) <= scoreThreshold {
73 binMid = (binMax-binMin)/2 + binMin
75 // Use the result from this iteration as the maximum for the next.
77 start := int(math.Max(1, float64(loc-binMid+1)))
78 finish := int(math.Min(float64(loc+binMid), float64(len(text))) + float64(len(pattern)))
80 rd := make([]int, finish+2)
81 rd[finish+1] = (1 << uint(d)) - 1
83 for j := finish; j >= start; j-- {
88 } else if _, ok := s[text[j-1]]; !ok {
91 charMatch = s[text[j-1]]
95 // First pass: exact match.
96 rd[j] = ((rd[j+1] << 1) | 1) & charMatch
98 // Subsequent passes: fuzzy match.
99 rd[j] = ((rd[j+1]<<1)|1)&charMatch | (((lastRd[j+1] | lastRd[j]) << 1) | 1) | lastRd[j+1]
101 if (rd[j] & matchmask) != 0 {
102 score := dmp.matchBitapScore(d, j-1, loc, pattern)
103 // This match will almost certainly be better than any existing match. But check anyway.
104 if score <= scoreThreshold {
106 scoreThreshold = score
109 // When passing loc, don't exceed our current distance from loc.
110 start = int(math.Max(1, float64(2*loc-bestLoc)))
112 // Already passed loc, downhill from here on in.
118 if dmp.matchBitapScore(d+1, loc, loc, pattern) > scoreThreshold {
119 // No hope for a (better) match at greater error levels.
127 // matchBitapScore computes and returns the score for a match with e errors and x location.
128 func (dmp *DiffMatchPatch) matchBitapScore(e, x, loc int, pattern string) float64 {
129 accuracy := float64(e) / float64(len(pattern))
130 proximity := math.Abs(float64(loc - x))
131 if dmp.MatchDistance == 0 {
132 // Dodge divide by zero error.
139 return accuracy + (proximity / float64(dmp.MatchDistance))
142 // MatchAlphabet initialises the alphabet for the Bitap algorithm.
143 func (dmp *DiffMatchPatch) MatchAlphabet(pattern string) map[byte]int {
145 charPattern := []byte(pattern)
146 for _, c := range charPattern {
154 for _, c := range charPattern {
155 value := s[c] | int(uint(1)<<uint((len(pattern)-i-1)))