unified.go

  1// Copyright 2019 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5package udiff
  6
  7import (
  8	"fmt"
  9	"log"
 10	"strings"
 11)
 12
 13// DefaultContextLines is the number of unchanged lines of surrounding
 14// context displayed by Unified. Use ToUnified to specify a different value.
 15const DefaultContextLines = 3
 16
 17// Unified returns a unified diff of the old and new strings.
 18// The old and new labels are the names of the old and new files.
 19// If the strings are equal, it returns the empty string.
 20func Unified(oldLabel, newLabel, old, new string) string {
 21	edits := Strings(old, new)
 22	unified, err := ToUnified(oldLabel, newLabel, old, edits, DefaultContextLines)
 23	if err != nil {
 24		// Can't happen: edits are consistent.
 25		log.Fatalf("internal error in diff.Unified: %v", err)
 26	}
 27	return unified
 28}
 29
 30// ToUnified applies the edits to content and returns a unified diff,
 31// with contextLines lines of (unchanged) context around each diff hunk.
 32// The old and new labels are the names of the content and result files.
 33// It returns an error if the edits are inconsistent; see ApplyEdits.
 34func ToUnified(oldLabel, newLabel, content string, edits []Edit, contextLines int) (string, error) {
 35	u, err := toUnified(oldLabel, newLabel, content, edits, contextLines)
 36	if err != nil {
 37		return "", err
 38	}
 39	return u.String(), nil
 40}
 41
 42// unified represents a set of edits as a unified diff.
 43type unified struct {
 44	// From is the name of the original file.
 45	From string
 46	// To is the name of the modified file.
 47	To string
 48	// Hunks is the set of edit Hunks needed to transform the file content.
 49	Hunks []*hunk
 50}
 51
 52// Hunk represents a contiguous set of line edits to apply.
 53type hunk struct {
 54	// The line in the original source where the hunk starts.
 55	FromLine int
 56	// The line in the original source where the hunk finishes.
 57	ToLine int
 58	// The set of line based edits to apply.
 59	Lines []line
 60}
 61
 62// Line represents a single line operation to apply as part of a Hunk.
 63type line struct {
 64	// Kind is the type of line this represents, deletion, insertion or copy.
 65	Kind OpKind
 66	// Content is the Content of this line.
 67	// For deletion it is the line being removed, for all others it is the line
 68	// to put in the output.
 69	Content string
 70}
 71
 72// OpKind is used to denote the type of operation a line represents.
 73type OpKind int
 74
 75const (
 76	// Delete is the operation kind for a line that is present in the input
 77	// but not in the output.
 78	Delete OpKind = iota
 79	// Insert is the operation kind for a line that is new in the output.
 80	Insert
 81	// Equal is the operation kind for a line that is the same in the input and
 82	// output, often used to provide context around edited lines.
 83	Equal
 84)
 85
 86// String returns a human readable representation of an OpKind. It is not
 87// intended for machine processing.
 88func (k OpKind) String() string {
 89	switch k {
 90	case Delete:
 91		return "delete"
 92	case Insert:
 93		return "insert"
 94	case Equal:
 95		return "equal"
 96	default:
 97		panic("unknown operation kind")
 98	}
 99}
100
101// toUnified takes a file contents and a sequence of edits, and calculates
102// a unified diff that represents those edits.
103func toUnified(fromName, toName string, content string, edits []Edit, contextLines int) (unified, error) {
104	gap := contextLines * 2
105	u := unified{
106		From: fromName,
107		To:   toName,
108	}
109	if len(edits) == 0 {
110		return u, nil
111	}
112	var err error
113	edits, err = lineEdits(content, edits) // expand to whole lines
114	if err != nil {
115		return u, err
116	}
117	lines := splitLines(content)
118	var h *hunk
119	last := 0
120	toLine := 0
121	for _, edit := range edits {
122		// Compute the zero-based line numbers of the edit start and end.
123		// TODO(adonovan): opt: compute incrementally, avoid O(n^2).
124		start := strings.Count(content[:edit.Start], "\n")
125		end := strings.Count(content[:edit.End], "\n")
126		if edit.End == len(content) && len(content) > 0 && content[len(content)-1] != '\n' {
127			end++ // EOF counts as an implicit newline
128		}
129
130		switch {
131		case h != nil && start == last:
132			// direct extension
133		case h != nil && start <= last+gap:
134			// within range of previous lines, add the joiners
135			addEqualLines(h, lines, last, start)
136		default:
137			// need to start a new hunk
138			if h != nil {
139				// add the edge to the previous hunk
140				addEqualLines(h, lines, last, last+contextLines)
141				u.Hunks = append(u.Hunks, h)
142			}
143			toLine += start - last
144			h = &hunk{
145				FromLine: start + 1,
146				ToLine:   toLine + 1,
147			}
148			// add the edge to the new hunk
149			delta := addEqualLines(h, lines, start-contextLines, start)
150			h.FromLine -= delta
151			h.ToLine -= delta
152		}
153		last = start
154		for i := start; i < end; i++ {
155			h.Lines = append(h.Lines, line{Kind: Delete, Content: lines[i]})
156			last++
157		}
158		if edit.New != "" {
159			for _, content := range splitLines(edit.New) {
160				h.Lines = append(h.Lines, line{Kind: Insert, Content: content})
161				toLine++
162			}
163		}
164	}
165	if h != nil {
166		// add the edge to the final hunk
167		addEqualLines(h, lines, last, last+contextLines)
168		u.Hunks = append(u.Hunks, h)
169	}
170	return u, nil
171}
172
173func splitLines(text string) []string {
174	lines := strings.SplitAfter(text, "\n")
175	if lines[len(lines)-1] == "" {
176		lines = lines[:len(lines)-1]
177	}
178	return lines
179}
180
181func addEqualLines(h *hunk, lines []string, start, end int) int {
182	delta := 0
183	for i := start; i < end; i++ {
184		if i < 0 {
185			continue
186		}
187		if i >= len(lines) {
188			return delta
189		}
190		h.Lines = append(h.Lines, line{Kind: Equal, Content: lines[i]})
191		delta++
192	}
193	return delta
194}
195
196// String converts a unified diff to the standard textual form for that diff.
197// The output of this function can be passed to tools like patch.
198func (u unified) String() string {
199	if len(u.Hunks) == 0 {
200		return ""
201	}
202	b := new(strings.Builder)
203	fmt.Fprintf(b, "--- %s\n", u.From)
204	fmt.Fprintf(b, "+++ %s\n", u.To)
205	for _, hunk := range u.Hunks {
206		fromCount, toCount := 0, 0
207		for _, l := range hunk.Lines {
208			switch l.Kind {
209			case Delete:
210				fromCount++
211			case Insert:
212				toCount++
213			default:
214				fromCount++
215				toCount++
216			}
217		}
218		fmt.Fprint(b, "@@")
219		if fromCount > 1 {
220			fmt.Fprintf(b, " -%d,%d", hunk.FromLine, fromCount)
221		} else if hunk.FromLine == 1 && fromCount == 0 {
222			// Match odd GNU diff -u behavior adding to empty file.
223			fmt.Fprintf(b, " -0,0")
224		} else {
225			fmt.Fprintf(b, " -%d", hunk.FromLine)
226		}
227		if toCount > 1 {
228			fmt.Fprintf(b, " +%d,%d", hunk.ToLine, toCount)
229		} else if hunk.ToLine == 1 && toCount == 0 {
230			// Match odd GNU diff -u behavior adding to empty file.
231			fmt.Fprintf(b, " +0,0")
232		} else {
233			fmt.Fprintf(b, " +%d", hunk.ToLine)
234		}
235		fmt.Fprint(b, " @@\n")
236		for _, l := range hunk.Lines {
237			switch l.Kind {
238			case Delete:
239				fmt.Fprintf(b, "-%s", l.Content)
240			case Insert:
241				fmt.Fprintf(b, "+%s", l.Content)
242			default:
243				fmt.Fprintf(b, " %s", l.Content)
244			}
245			if !strings.HasSuffix(l.Content, "\n") {
246				fmt.Fprintf(b, "\n\\ No newline at end of file\n")
247			}
248		}
249	}
250	return b.String()
251}