1// Copyright 2013 Google Inc. All rights reserved.
2//
3// Licensed under the Apache License, Version 2.0 (the "License");
4// you may not use this file except in compliance with the License.
5// You may obtain a copy of the License at
6//
7// http://www.apache.org/licenses/LICENSE-2.0
8//
9// Unless required by applicable law or agreed to in writing, software
10// distributed under the License is distributed on an "AS IS" BASIS,
11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
12// See the License for the specific language governing permissions and
13// limitations under the License.
14
15// Package diff implements a linewise diff algorithm.
16package diff
17
18import (
19 "bytes"
20 "fmt"
21 "strings"
22)
23
24// Chunk represents a piece of the diff. A chunk will not have both added and
25// deleted lines. Equal lines are always after any added or deleted lines.
26// A Chunk may or may not have any lines in it, especially for the first or last
27// chunk in a computation.
28type Chunk struct {
29 Added []string
30 Deleted []string
31 Equal []string
32}
33
34func (c *Chunk) empty() bool {
35 return len(c.Added) == 0 && len(c.Deleted) == 0 && len(c.Equal) == 0
36}
37
38// Diff returns a string containing a line-by-line unified diff of the linewise
39// changes required to make A into B. Each line is prefixed with '+', '-', or
40// ' ' to indicate if it should be added, removed, or is correct respectively.
41func Diff(A, B string) string {
42 aLines := strings.Split(A, "\n")
43 bLines := strings.Split(B, "\n")
44
45 chunks := DiffChunks(aLines, bLines)
46
47 buf := new(bytes.Buffer)
48 for _, c := range chunks {
49 for _, line := range c.Added {
50 fmt.Fprintf(buf, "+%s\n", line)
51 }
52 for _, line := range c.Deleted {
53 fmt.Fprintf(buf, "-%s\n", line)
54 }
55 for _, line := range c.Equal {
56 fmt.Fprintf(buf, " %s\n", line)
57 }
58 }
59 return strings.TrimRight(buf.String(), "\n")
60}
61
62// DiffChunks uses an O(D(N+M)) shortest-edit-script algorithm
63// to compute the edits required from A to B and returns the
64// edit chunks.
65func DiffChunks(a, b []string) []Chunk {
66 // algorithm: http://www.xmailserver.org/diff2.pdf
67
68 // We'll need these quantities a lot.
69 alen, blen := len(a), len(b) // M, N
70
71 // At most, it will require len(a) deletions and len(b) additions
72 // to transform a into b.
73 maxPath := alen + blen // MAX
74 if maxPath == 0 {
75 // degenerate case: two empty lists are the same
76 return nil
77 }
78
79 // Store the endpoint of the path for diagonals.
80 // We store only the a index, because the b index on any diagonal
81 // (which we know during the loop below) is aidx-diag.
82 // endpoint[maxPath] represents the 0 diagonal.
83 //
84 // Stated differently:
85 // endpoint[d] contains the aidx of a furthest reaching path in diagonal d
86 endpoint := make([]int, 2*maxPath+1) // V
87
88 saved := make([][]int, 0, 8) // Vs
89 save := func() {
90 dup := make([]int, len(endpoint))
91 copy(dup, endpoint)
92 saved = append(saved, dup)
93 }
94
95 var editDistance int // D
96dLoop:
97 for editDistance = 0; editDistance <= maxPath; editDistance++ {
98 // The 0 diag(onal) represents equality of a and b. Each diagonal to
99 // the left is numbered one lower, to the right is one higher, from
100 // -alen to +blen. Negative diagonals favor differences from a,
101 // positive diagonals favor differences from b. The edit distance to a
102 // diagonal d cannot be shorter than d itself.
103 //
104 // The iterations of this loop cover either odds or evens, but not both,
105 // If odd indices are inputs, even indices are outputs and vice versa.
106 for diag := -editDistance; diag <= editDistance; diag += 2 { // k
107 var aidx int // x
108 switch {
109 case diag == -editDistance:
110 // This is a new diagonal; copy from previous iter
111 aidx = endpoint[maxPath-editDistance+1] + 0
112 case diag == editDistance:
113 // This is a new diagonal; copy from previous iter
114 aidx = endpoint[maxPath+editDistance-1] + 1
115 case endpoint[maxPath+diag+1] > endpoint[maxPath+diag-1]:
116 // diagonal d+1 was farther along, so use that
117 aidx = endpoint[maxPath+diag+1] + 0
118 default:
119 // diagonal d-1 was farther (or the same), so use that
120 aidx = endpoint[maxPath+diag-1] + 1
121 }
122 // On diagonal d, we can compute bidx from aidx.
123 bidx := aidx - diag // y
124 // See how far we can go on this diagonal before we find a difference.
125 for aidx < alen && bidx < blen && a[aidx] == b[bidx] {
126 aidx++
127 bidx++
128 }
129 // Store the end of the current edit chain.
130 endpoint[maxPath+diag] = aidx
131 // If we've found the end of both inputs, we're done!
132 if aidx >= alen && bidx >= blen {
133 save() // save the final path
134 break dLoop
135 }
136 }
137 save() // save the current path
138 }
139 if editDistance == 0 {
140 return nil
141 }
142 chunks := make([]Chunk, editDistance+1)
143
144 x, y := alen, blen
145 for d := editDistance; d > 0; d-- {
146 endpoint := saved[d]
147 diag := x - y
148 insert := diag == -d || (diag != d && endpoint[maxPath+diag-1] < endpoint[maxPath+diag+1])
149
150 x1 := endpoint[maxPath+diag]
151 var x0, xM, kk int
152 if insert {
153 kk = diag + 1
154 x0 = endpoint[maxPath+kk]
155 xM = x0
156 } else {
157 kk = diag - 1
158 x0 = endpoint[maxPath+kk]
159 xM = x0 + 1
160 }
161 y0 := x0 - kk
162
163 var c Chunk
164 if insert {
165 c.Added = b[y0:][:1]
166 } else {
167 c.Deleted = a[x0:][:1]
168 }
169 if xM < x1 {
170 c.Equal = a[xM:][:x1-xM]
171 }
172
173 x, y = x0, y0
174 chunks[d] = c
175 }
176 if x > 0 {
177 chunks[0].Equal = a[:x]
178 }
179 if chunks[0].empty() {
180 chunks = chunks[1:]
181 }
182 if len(chunks) == 0 {
183 return nil
184 }
185 return chunks
186}