diff.go

  1// Copyright 2019 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5// Package diff computes differences between text files or strings.
  6package udiff
  7
  8import (
  9	"fmt"
 10	"slices"
 11	"sort"
 12	"strings"
 13)
 14
 15// An Edit describes the replacement of a portion of a text file.
 16type Edit struct {
 17	Start, End int    // byte offsets of the region to replace
 18	New        string // the replacement
 19}
 20
 21func (e Edit) String() string {
 22	return fmt.Sprintf("{Start:%d,End:%d,New:%q}", e.Start, e.End, e.New)
 23}
 24
 25// Apply applies a sequence of edits to the src buffer and returns the
 26// result. Edits are applied in order of start offset; edits with the
 27// same start offset are applied in they order they were provided.
 28//
 29// Apply returns an error if any edit is out of bounds,
 30// or if any pair of edits is overlapping.
 31func Apply(src string, edits []Edit) (string, error) {
 32	edits, size, err := validate(src, edits)
 33	if err != nil {
 34		return "", err
 35	}
 36
 37	// Apply edits.
 38	out := make([]byte, 0, size)
 39	lastEnd := 0
 40	for _, edit := range edits {
 41		if lastEnd < edit.Start {
 42			out = append(out, src[lastEnd:edit.Start]...)
 43		}
 44		out = append(out, edit.New...)
 45		lastEnd = edit.End
 46	}
 47	out = append(out, src[lastEnd:]...)
 48
 49	if len(out) != size {
 50		panic("wrong size")
 51	}
 52
 53	return string(out), nil
 54}
 55
 56// ApplyBytes is like Apply, but it accepts a byte slice.
 57// The result is always a new array.
 58func ApplyBytes(src []byte, edits []Edit) ([]byte, error) {
 59	res, err := Apply(string(src), edits)
 60	return []byte(res), err
 61}
 62
 63// validate checks that edits are consistent with src,
 64// and returns the size of the patched output.
 65// It may return a different slice.
 66func validate(src string, edits []Edit) ([]Edit, int, error) {
 67	if !sort.IsSorted(editsSort(edits)) {
 68		edits = slices.Clone(edits)
 69		SortEdits(edits)
 70	}
 71
 72	// Check validity of edits and compute final size.
 73	size := len(src)
 74	lastEnd := 0
 75	for _, edit := range edits {
 76		if !(0 <= edit.Start && edit.Start <= edit.End && edit.End <= len(src)) {
 77			return nil, 0, fmt.Errorf("diff has out-of-bounds edits")
 78		}
 79		if edit.Start < lastEnd {
 80			return nil, 0, fmt.Errorf("diff has overlapping edits")
 81		}
 82		size += len(edit.New) + edit.Start - edit.End
 83		lastEnd = edit.End
 84	}
 85
 86	return edits, size, nil
 87}
 88
 89// SortEdits orders a slice of Edits by (start, end) offset.
 90// This ordering puts insertions (end = start) before deletions
 91// (end > start) at the same point, but uses a stable sort to preserve
 92// the order of multiple insertions at the same point.
 93// (Apply detects multiple deletions at the same point as an error.)
 94func SortEdits(edits []Edit) {
 95	sort.Stable(editsSort(edits))
 96}
 97
 98type editsSort []Edit
 99
100func (a editsSort) Len() int { return len(a) }
101func (a editsSort) Less(i, j int) bool {
102	if cmp := a[i].Start - a[j].Start; cmp != 0 {
103		return cmp < 0
104	}
105	return a[i].End < a[j].End
106}
107func (a editsSort) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
108
109// lineEdits expands and merges a sequence of edits so that each
110// resulting edit replaces one or more complete lines.
111// See ApplyEdits for preconditions.
112func lineEdits(src string, edits []Edit) ([]Edit, error) {
113	edits, _, err := validate(src, edits)
114	if err != nil {
115		return nil, err
116	}
117
118	// Do all deletions begin and end at the start of a line,
119	// and all insertions end with a newline?
120	// (This is merely a fast path.)
121	for _, edit := range edits {
122		if edit.Start >= len(src) || // insertion at EOF
123			edit.Start > 0 && src[edit.Start-1] != '\n' || // not at line start
124			edit.End > 0 && src[edit.End-1] != '\n' || // not at line start
125			edit.New != "" && edit.New[len(edit.New)-1] != '\n' { // partial insert
126			goto expand // slow path
127		}
128	}
129	return edits, nil // aligned
130
131expand:
132	if len(edits) == 0 {
133		return edits, nil // no edits (unreachable due to fast path)
134	}
135	expanded := make([]Edit, 0, len(edits)) // a guess
136	prev := edits[0]
137	// TODO(adonovan): opt: start from the first misaligned edit.
138	// TODO(adonovan): opt: avoid quadratic cost of string += string.
139	for _, edit := range edits[1:] {
140		between := src[prev.End:edit.Start]
141		if !strings.Contains(between, "\n") {
142			// overlapping lines: combine with previous edit.
143			prev.New += between + edit.New
144			prev.End = edit.End
145		} else {
146			// non-overlapping lines: flush previous edit.
147			expanded = append(expanded, expandEdit(prev, src))
148			prev = edit
149		}
150	}
151	return append(expanded, expandEdit(prev, src)), nil // flush final edit
152}
153
154// expandEdit returns edit expanded to complete whole lines.
155func expandEdit(edit Edit, src string) Edit {
156	// Expand start left to start of line.
157	// (delta is the zero-based column number of start.)
158	start := edit.Start
159	if delta := start - 1 - strings.LastIndex(src[:start], "\n"); delta > 0 {
160		edit.Start -= delta
161		edit.New = src[start-delta:start] + edit.New
162	}
163
164	// Expand end right to end of line.
165	end := edit.End
166	if end > 0 && src[end-1] != '\n' ||
167		edit.New != "" && edit.New[len(edit.New)-1] != '\n' {
168		if nl := strings.IndexByte(src[end:], '\n'); nl < 0 {
169			edit.End = len(src) // extend to EOF
170		} else {
171			edit.End = end + nl + 1 // extend beyond \n
172		}
173	}
174	edit.New += src[end:edit.End]
175
176	return edit
177}