diff.go

  1// Copyright 2019 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5// Package myers implements the Myers diff algorithm.
  6package myers
  7
  8import (
  9	"strings"
 10
 11	diff "github.com/aymanbagabas/go-udiff"
 12)
 13
 14// Sources:
 15// https://blog.jcoglan.com/2017/02/17/the-myers-diff-algorithm-part-3/
 16// https://www.codeproject.com/Articles/42279/%2FArticles%2F42279%2FInvestigating-Myers-diff-algorithm-Part-1-of-2
 17
 18// ComputeEdits returns the diffs of two strings using a simple
 19// line-based implementation, like [diff.Strings].
 20//
 21// Deprecated: this implementation is moribund. However, when diffs
 22// appear in marker test expectations, they are the particular diffs
 23// produced by this implementation. The marker test framework
 24// asserts diff(orig, got)==wantDiff, but ideally it would compute
 25// got==apply(orig, wantDiff) so that the notation of the diff
 26// is immaterial.
 27func ComputeEdits(before, after string) []diff.Edit {
 28	beforeLines := splitLines(before)
 29	ops := operations(beforeLines, splitLines(after))
 30
 31	// Build a table mapping line number to offset.
 32	lineOffsets := make([]int, 0, len(beforeLines)+1)
 33	total := 0
 34	for i := range beforeLines {
 35		lineOffsets = append(lineOffsets, total)
 36		total += len(beforeLines[i])
 37	}
 38	lineOffsets = append(lineOffsets, total) // EOF
 39
 40	edits := make([]diff.Edit, 0, len(ops))
 41	for _, op := range ops {
 42		start, end := lineOffsets[op.I1], lineOffsets[op.I2]
 43		switch op.Kind {
 44		case opDelete:
 45			// Delete: before[I1:I2] is deleted.
 46			edits = append(edits, diff.Edit{Start: start, End: end})
 47		case opInsert:
 48			// Insert: after[J1:J2] is inserted at before[I1:I1].
 49			if content := strings.Join(op.Content, ""); content != "" {
 50				edits = append(edits, diff.Edit{Start: start, End: end, New: content})
 51			}
 52		}
 53	}
 54	return edits
 55}
 56
 57// opKind is used to denote the type of operation a line represents.
 58type opKind int
 59
 60const (
 61	opDelete opKind = iota // line deleted from input (-)
 62	opInsert               // line inserted into output (+)
 63	opEqual                // line present in input and output
 64)
 65
 66func (kind opKind) String() string {
 67	switch kind {
 68	case opDelete:
 69		return "delete"
 70	case opInsert:
 71		return "insert"
 72	case opEqual:
 73		return "equal"
 74	default:
 75		panic("unknown opKind")
 76	}
 77}
 78
 79type operation struct {
 80	Kind    opKind
 81	Content []string // content from b
 82	I1, I2  int      // indices of the line in a
 83	J1      int      // indices of the line in b, J2 implied by len(Content)
 84}
 85
 86// operations returns the list of operations to convert a into b, consolidating
 87// operations for multiple lines and not including equal lines.
 88func operations(a, b []string) []*operation {
 89	if len(a) == 0 && len(b) == 0 {
 90		return nil
 91	}
 92
 93	trace, offset := shortestEditSequence(a, b)
 94	snakes := backtrack(trace, len(a), len(b), offset)
 95
 96	M, N := len(a), len(b)
 97
 98	var i int
 99	solution := make([]*operation, len(a)+len(b))
100
101	add := func(op *operation, i2, j2 int) {
102		if op == nil {
103			return
104		}
105		op.I2 = i2
106		if op.Kind == opInsert {
107			op.Content = b[op.J1:j2]
108		}
109		solution[i] = op
110		i++
111	}
112	x, y := 0, 0
113	for _, snake := range snakes {
114		if len(snake) < 2 {
115			continue
116		}
117		var op *operation
118		// delete (horizontal)
119		for snake[0]-snake[1] > x-y {
120			if op == nil {
121				op = &operation{
122					Kind: opDelete,
123					I1:   x,
124					J1:   y,
125				}
126			}
127			x++
128			if x == M {
129				break
130			}
131		}
132		add(op, x, y)
133		op = nil
134		// insert (vertical)
135		for snake[0]-snake[1] < x-y {
136			if op == nil {
137				op = &operation{
138					Kind: opInsert,
139					I1:   x,
140					J1:   y,
141				}
142			}
143			y++
144		}
145		add(op, x, y)
146		op = nil
147		// equal (diagonal)
148		for x < snake[0] {
149			x++
150			y++
151		}
152		if x >= M && y >= N {
153			break
154		}
155	}
156	return solution[:i]
157}
158
159// backtrack uses the trace for the edit sequence computation and returns the
160// "snakes" that make up the solution. A "snake" is a single deletion or
161// insertion followed by zero or diagonals.
162func backtrack(trace [][]int, x, y, offset int) [][]int {
163	snakes := make([][]int, len(trace))
164	d := len(trace) - 1
165	for ; x > 0 && y > 0 && d > 0; d-- {
166		V := trace[d]
167		if len(V) == 0 {
168			continue
169		}
170		snakes[d] = []int{x, y}
171
172		k := x - y
173
174		var kPrev int
175		if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
176			kPrev = k + 1
177		} else {
178			kPrev = k - 1
179		}
180
181		x = V[kPrev+offset]
182		y = x - kPrev
183	}
184	if x < 0 || y < 0 {
185		return snakes
186	}
187	snakes[d] = []int{x, y}
188	return snakes
189}
190
191// shortestEditSequence returns the shortest edit sequence that converts a into b.
192func shortestEditSequence(a, b []string) ([][]int, int) {
193	M, N := len(a), len(b)
194	V := make([]int, 2*(N+M)+1)
195	offset := N + M
196	trace := make([][]int, N+M+1)
197
198	// Iterate through the maximum possible length of the SES (N+M).
199	for d := 0; d <= N+M; d++ {
200		copyV := make([]int, len(V))
201		// k lines are represented by the equation y = x - k. We move in
202		// increments of 2 because end points for even d are on even k lines.
203		for k := -d; k <= d; k += 2 {
204			// At each point, we either go down or to the right. We go down if
205			// k == -d, and we go to the right if k == d. We also prioritize
206			// the maximum x value, because we prefer deletions to insertions.
207			var x int
208			if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
209				x = V[k+1+offset] // down
210			} else {
211				x = V[k-1+offset] + 1 // right
212			}
213
214			y := x - k
215
216			// Diagonal moves while we have equal contents.
217			for x < M && y < N && a[x] == b[y] {
218				x++
219				y++
220			}
221
222			V[k+offset] = x
223
224			// Return if we've exceeded the maximum values.
225			if x == M && y == N {
226				// Makes sure to save the state of the array before returning.
227				copy(copyV, V)
228				trace[d] = copyV
229				return trace, offset
230			}
231		}
232
233		// Save the state of the array.
234		copy(copyV, V)
235		trace[d] = copyV
236	}
237	return nil, 0
238}
239
240func splitLines(text string) []string {
241	lines := strings.SplitAfter(text, "\n")
242	if lines[len(lines)-1] == "" {
243		lines = lines[:len(lines)-1]
244	}
245	return lines
246}