1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package myers implements the Myers diff algorithm.
6package myers
7
8import (
9 "strings"
10
11 diff "github.com/aymanbagabas/go-udiff"
12)
13
14// Sources:
15// https://blog.jcoglan.com/2017/02/17/the-myers-diff-algorithm-part-3/
16// https://www.codeproject.com/Articles/42279/%2FArticles%2F42279%2FInvestigating-Myers-diff-algorithm-Part-1-of-2
17
18// ComputeEdits returns the diffs of two strings using a simple
19// line-based implementation, like [diff.Strings].
20//
21// Deprecated: this implementation is moribund. However, when diffs
22// appear in marker test expectations, they are the particular diffs
23// produced by this implementation. The marker test framework
24// asserts diff(orig, got)==wantDiff, but ideally it would compute
25// got==apply(orig, wantDiff) so that the notation of the diff
26// is immaterial.
27func ComputeEdits(before, after string) []diff.Edit {
28 beforeLines := splitLines(before)
29 ops := operations(beforeLines, splitLines(after))
30
31 // Build a table mapping line number to offset.
32 lineOffsets := make([]int, 0, len(beforeLines)+1)
33 total := 0
34 for i := range beforeLines {
35 lineOffsets = append(lineOffsets, total)
36 total += len(beforeLines[i])
37 }
38 lineOffsets = append(lineOffsets, total) // EOF
39
40 edits := make([]diff.Edit, 0, len(ops))
41 for _, op := range ops {
42 start, end := lineOffsets[op.I1], lineOffsets[op.I2]
43 switch op.Kind {
44 case opDelete:
45 // Delete: before[I1:I2] is deleted.
46 edits = append(edits, diff.Edit{Start: start, End: end})
47 case opInsert:
48 // Insert: after[J1:J2] is inserted at before[I1:I1].
49 if content := strings.Join(op.Content, ""); content != "" {
50 edits = append(edits, diff.Edit{Start: start, End: end, New: content})
51 }
52 }
53 }
54 return edits
55}
56
57// opKind is used to denote the type of operation a line represents.
58type opKind int
59
60const (
61 opDelete opKind = iota // line deleted from input (-)
62 opInsert // line inserted into output (+)
63 opEqual // line present in input and output
64)
65
66func (kind opKind) String() string {
67 switch kind {
68 case opDelete:
69 return "delete"
70 case opInsert:
71 return "insert"
72 case opEqual:
73 return "equal"
74 default:
75 panic("unknown opKind")
76 }
77}
78
79type operation struct {
80 Kind opKind
81 Content []string // content from b
82 I1, I2 int // indices of the line in a
83 J1 int // indices of the line in b, J2 implied by len(Content)
84}
85
86// operations returns the list of operations to convert a into b, consolidating
87// operations for multiple lines and not including equal lines.
88func operations(a, b []string) []*operation {
89 if len(a) == 0 && len(b) == 0 {
90 return nil
91 }
92
93 trace, offset := shortestEditSequence(a, b)
94 snakes := backtrack(trace, len(a), len(b), offset)
95
96 M, N := len(a), len(b)
97
98 var i int
99 solution := make([]*operation, len(a)+len(b))
100
101 add := func(op *operation, i2, j2 int) {
102 if op == nil {
103 return
104 }
105 op.I2 = i2
106 if op.Kind == opInsert {
107 op.Content = b[op.J1:j2]
108 }
109 solution[i] = op
110 i++
111 }
112 x, y := 0, 0
113 for _, snake := range snakes {
114 if len(snake) < 2 {
115 continue
116 }
117 var op *operation
118 // delete (horizontal)
119 for snake[0]-snake[1] > x-y {
120 if op == nil {
121 op = &operation{
122 Kind: opDelete,
123 I1: x,
124 J1: y,
125 }
126 }
127 x++
128 if x == M {
129 break
130 }
131 }
132 add(op, x, y)
133 op = nil
134 // insert (vertical)
135 for snake[0]-snake[1] < x-y {
136 if op == nil {
137 op = &operation{
138 Kind: opInsert,
139 I1: x,
140 J1: y,
141 }
142 }
143 y++
144 }
145 add(op, x, y)
146 op = nil
147 // equal (diagonal)
148 for x < snake[0] {
149 x++
150 y++
151 }
152 if x >= M && y >= N {
153 break
154 }
155 }
156 return solution[:i]
157}
158
159// backtrack uses the trace for the edit sequence computation and returns the
160// "snakes" that make up the solution. A "snake" is a single deletion or
161// insertion followed by zero or diagonals.
162func backtrack(trace [][]int, x, y, offset int) [][]int {
163 snakes := make([][]int, len(trace))
164 d := len(trace) - 1
165 for ; x > 0 && y > 0 && d > 0; d-- {
166 V := trace[d]
167 if len(V) == 0 {
168 continue
169 }
170 snakes[d] = []int{x, y}
171
172 k := x - y
173
174 var kPrev int
175 if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
176 kPrev = k + 1
177 } else {
178 kPrev = k - 1
179 }
180
181 x = V[kPrev+offset]
182 y = x - kPrev
183 }
184 if x < 0 || y < 0 {
185 return snakes
186 }
187 snakes[d] = []int{x, y}
188 return snakes
189}
190
191// shortestEditSequence returns the shortest edit sequence that converts a into b.
192func shortestEditSequence(a, b []string) ([][]int, int) {
193 M, N := len(a), len(b)
194 V := make([]int, 2*(N+M)+1)
195 offset := N + M
196 trace := make([][]int, N+M+1)
197
198 // Iterate through the maximum possible length of the SES (N+M).
199 for d := 0; d <= N+M; d++ {
200 copyV := make([]int, len(V))
201 // k lines are represented by the equation y = x - k. We move in
202 // increments of 2 because end points for even d are on even k lines.
203 for k := -d; k <= d; k += 2 {
204 // At each point, we either go down or to the right. We go down if
205 // k == -d, and we go to the right if k == d. We also prioritize
206 // the maximum x value, because we prefer deletions to insertions.
207 var x int
208 if k == -d || (k != d && V[k-1+offset] < V[k+1+offset]) {
209 x = V[k+1+offset] // down
210 } else {
211 x = V[k-1+offset] + 1 // right
212 }
213
214 y := x - k
215
216 // Diagonal moves while we have equal contents.
217 for x < M && y < N && a[x] == b[y] {
218 x++
219 y++
220 }
221
222 V[k+offset] = x
223
224 // Return if we've exceeded the maximum values.
225 if x == M && y == N {
226 // Makes sure to save the state of the array before returning.
227 copy(copyV, V)
228 trace[d] = copyV
229 return trace, offset
230 }
231 }
232
233 // Save the state of the array.
234 copy(copyV, V)
235 trace[d] = copyV
236 }
237 return nil, 0
238}
239
240func splitLines(text string) []string {
241 lines := strings.SplitAfter(text, "\n")
242 if lines[len(lines)-1] == "" {
243 lines = lines[:len(lines)-1]
244 }
245 return lines
246}