diff.go

  1// Copyright 2013 Google Inc.  All rights reserved.
  2//
  3// Licensed under the Apache License, Version 2.0 (the "License");
  4// you may not use this file except in compliance with the License.
  5// You may obtain a copy of the License at
  6//
  7//     http://www.apache.org/licenses/LICENSE-2.0
  8//
  9// Unless required by applicable law or agreed to in writing, software
 10// distributed under the License is distributed on an "AS IS" BASIS,
 11// WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
 12// See the License for the specific language governing permissions and
 13// limitations under the License.
 14
 15// Package diff implements a linewise diff algorithm.
 16package diff
 17
 18import (
 19	"bytes"
 20	"fmt"
 21	"strings"
 22)
 23
 24// Chunk represents a piece of the diff.  A chunk will not have both added and
 25// deleted lines.  Equal lines are always after any added or deleted lines.
 26// A Chunk may or may not have any lines in it, especially for the first or last
 27// chunk in a computation.
 28type Chunk struct {
 29	Added   []string
 30	Deleted []string
 31	Equal   []string
 32}
 33
 34func (c *Chunk) empty() bool {
 35	return len(c.Added) == 0 && len(c.Deleted) == 0 && len(c.Equal) == 0
 36}
 37
 38// Diff returns a string containing a line-by-line unified diff of the linewise
 39// changes required to make A into B.  Each line is prefixed with '+', '-', or
 40// ' ' to indicate if it should be added, removed, or is correct respectively.
 41func Diff(A, B string) string {
 42	aLines := strings.Split(A, "\n")
 43	bLines := strings.Split(B, "\n")
 44
 45	chunks := DiffChunks(aLines, bLines)
 46
 47	buf := new(bytes.Buffer)
 48	for _, c := range chunks {
 49		for _, line := range c.Added {
 50			fmt.Fprintf(buf, "+%s\n", line)
 51		}
 52		for _, line := range c.Deleted {
 53			fmt.Fprintf(buf, "-%s\n", line)
 54		}
 55		for _, line := range c.Equal {
 56			fmt.Fprintf(buf, " %s\n", line)
 57		}
 58	}
 59	return strings.TrimRight(buf.String(), "\n")
 60}
 61
 62// DiffChunks uses an O(D(N+M)) shortest-edit-script algorithm
 63// to compute the edits required from A to B and returns the
 64// edit chunks.
 65func DiffChunks(a, b []string) []Chunk {
 66	// algorithm: http://www.xmailserver.org/diff2.pdf
 67
 68	// We'll need these quantities a lot.
 69	alen, blen := len(a), len(b) // M, N
 70
 71	// At most, it will require len(a) deletions and len(b) additions
 72	// to transform a into b.
 73	maxPath := alen + blen // MAX
 74	if maxPath == 0 {
 75		// degenerate case: two empty lists are the same
 76		return nil
 77	}
 78
 79	// Store the endpoint of the path for diagonals.
 80	// We store only the a index, because the b index on any diagonal
 81	// (which we know during the loop below) is aidx-diag.
 82	// endpoint[maxPath] represents the 0 diagonal.
 83	//
 84	// Stated differently:
 85	// endpoint[d] contains the aidx of a furthest reaching path in diagonal d
 86	endpoint := make([]int, 2*maxPath+1) // V
 87
 88	saved := make([][]int, 0, 8) // Vs
 89	save := func() {
 90		dup := make([]int, len(endpoint))
 91		copy(dup, endpoint)
 92		saved = append(saved, dup)
 93	}
 94
 95	var editDistance int // D
 96dLoop:
 97	for editDistance = 0; editDistance <= maxPath; editDistance++ {
 98		// The 0 diag(onal) represents equality of a and b.  Each diagonal to
 99		// the left is numbered one lower, to the right is one higher, from
100		// -alen to +blen.  Negative diagonals favor differences from a,
101		// positive diagonals favor differences from b.  The edit distance to a
102		// diagonal d cannot be shorter than d itself.
103		//
104		// The iterations of this loop cover either odds or evens, but not both,
105		// If odd indices are inputs, even indices are outputs and vice versa.
106		for diag := -editDistance; diag <= editDistance; diag += 2 { // k
107			var aidx int // x
108			switch {
109			case diag == -editDistance:
110				// This is a new diagonal; copy from previous iter
111				aidx = endpoint[maxPath-editDistance+1] + 0
112			case diag == editDistance:
113				// This is a new diagonal; copy from previous iter
114				aidx = endpoint[maxPath+editDistance-1] + 1
115			case endpoint[maxPath+diag+1] > endpoint[maxPath+diag-1]:
116				// diagonal d+1 was farther along, so use that
117				aidx = endpoint[maxPath+diag+1] + 0
118			default:
119				// diagonal d-1 was farther (or the same), so use that
120				aidx = endpoint[maxPath+diag-1] + 1
121			}
122			// On diagonal d, we can compute bidx from aidx.
123			bidx := aidx - diag // y
124			// See how far we can go on this diagonal before we find a difference.
125			for aidx < alen && bidx < blen && a[aidx] == b[bidx] {
126				aidx++
127				bidx++
128			}
129			// Store the end of the current edit chain.
130			endpoint[maxPath+diag] = aidx
131			// If we've found the end of both inputs, we're done!
132			if aidx >= alen && bidx >= blen {
133				save() // save the final path
134				break dLoop
135			}
136		}
137		save() // save the current path
138	}
139	if editDistance == 0 {
140		return nil
141	}
142	chunks := make([]Chunk, editDistance+1)
143
144	x, y := alen, blen
145	for d := editDistance; d > 0; d-- {
146		endpoint := saved[d]
147		diag := x - y
148		insert := diag == -d || (diag != d && endpoint[maxPath+diag-1] < endpoint[maxPath+diag+1])
149
150		x1 := endpoint[maxPath+diag]
151		var x0, xM, kk int
152		if insert {
153			kk = diag + 1
154			x0 = endpoint[maxPath+kk]
155			xM = x0
156		} else {
157			kk = diag - 1
158			x0 = endpoint[maxPath+kk]
159			xM = x0 + 1
160		}
161		y0 := x0 - kk
162
163		var c Chunk
164		if insert {
165			c.Added = b[y0:][:1]
166		} else {
167			c.Deleted = a[x0:][:1]
168		}
169		if xM < x1 {
170			c.Equal = a[xM:][:x1-xM]
171		}
172
173		x, y = x0, y0
174		chunks[d] = c
175	}
176	if x > 0 {
177		chunks[0].Equal = a[:x]
178	}
179	if chunks[0].empty() {
180		chunks = chunks[1:]
181	}
182	if len(chunks) == 0 {
183		return nil
184	}
185	return chunks
186}