1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package diff computes differences between text files or strings.
6package udiff
7
8import (
9 "fmt"
10 "slices"
11 "sort"
12 "strings"
13)
14
15// An Edit describes the replacement of a portion of a text file.
16type Edit struct {
17 Start, End int // byte offsets of the region to replace
18 New string // the replacement
19}
20
21func (e Edit) String() string {
22 return fmt.Sprintf("{Start:%d,End:%d,New:%q}", e.Start, e.End, e.New)
23}
24
25// Apply applies a sequence of edits to the src buffer and returns the
26// result. Edits are applied in order of start offset; edits with the
27// same start offset are applied in they order they were provided.
28//
29// Apply returns an error if any edit is out of bounds,
30// or if any pair of edits is overlapping.
31func Apply(src string, edits []Edit) (string, error) {
32 edits, size, err := validate(src, edits)
33 if err != nil {
34 return "", err
35 }
36
37 // Apply edits.
38 out := make([]byte, 0, size)
39 lastEnd := 0
40 for _, edit := range edits {
41 if lastEnd < edit.Start {
42 out = append(out, src[lastEnd:edit.Start]...)
43 }
44 out = append(out, edit.New...)
45 lastEnd = edit.End
46 }
47 out = append(out, src[lastEnd:]...)
48
49 if len(out) != size {
50 panic("wrong size")
51 }
52
53 return string(out), nil
54}
55
56// ApplyBytes is like Apply, but it accepts a byte slice.
57// The result is always a new array.
58func ApplyBytes(src []byte, edits []Edit) ([]byte, error) {
59 res, err := Apply(string(src), edits)
60 return []byte(res), err
61}
62
63// validate checks that edits are consistent with src,
64// and returns the size of the patched output.
65// It may return a different slice.
66func validate(src string, edits []Edit) ([]Edit, int, error) {
67 if !sort.IsSorted(editsSort(edits)) {
68 edits = slices.Clone(edits)
69 SortEdits(edits)
70 }
71
72 // Check validity of edits and compute final size.
73 size := len(src)
74 lastEnd := 0
75 for _, edit := range edits {
76 if !(0 <= edit.Start && edit.Start <= edit.End && edit.End <= len(src)) {
77 return nil, 0, fmt.Errorf("diff has out-of-bounds edits")
78 }
79 if edit.Start < lastEnd {
80 return nil, 0, fmt.Errorf("diff has overlapping edits")
81 }
82 size += len(edit.New) + edit.Start - edit.End
83 lastEnd = edit.End
84 }
85
86 return edits, size, nil
87}
88
89// SortEdits orders a slice of Edits by (start, end) offset.
90// This ordering puts insertions (end = start) before deletions
91// (end > start) at the same point, but uses a stable sort to preserve
92// the order of multiple insertions at the same point.
93// (Apply detects multiple deletions at the same point as an error.)
94func SortEdits(edits []Edit) {
95 sort.Stable(editsSort(edits))
96}
97
98type editsSort []Edit
99
100func (a editsSort) Len() int { return len(a) }
101func (a editsSort) Less(i, j int) bool {
102 if cmp := a[i].Start - a[j].Start; cmp != 0 {
103 return cmp < 0
104 }
105 return a[i].End < a[j].End
106}
107func (a editsSort) Swap(i, j int) { a[i], a[j] = a[j], a[i] }
108
109// lineEdits expands and merges a sequence of edits so that each
110// resulting edit replaces one or more complete lines.
111// See ApplyEdits for preconditions.
112func lineEdits(src string, edits []Edit) ([]Edit, error) {
113 edits, _, err := validate(src, edits)
114 if err != nil {
115 return nil, err
116 }
117
118 // Do all deletions begin and end at the start of a line,
119 // and all insertions end with a newline?
120 // (This is merely a fast path.)
121 for _, edit := range edits {
122 if edit.Start >= len(src) || // insertion at EOF
123 edit.Start > 0 && src[edit.Start-1] != '\n' || // not at line start
124 edit.End > 0 && src[edit.End-1] != '\n' || // not at line start
125 edit.New != "" && edit.New[len(edit.New)-1] != '\n' { // partial insert
126 goto expand // slow path
127 }
128 }
129 return edits, nil // aligned
130
131expand:
132 if len(edits) == 0 {
133 return edits, nil // no edits (unreachable due to fast path)
134 }
135 expanded := make([]Edit, 0, len(edits)) // a guess
136 prev := edits[0]
137 // TODO(adonovan): opt: start from the first misaligned edit.
138 // TODO(adonovan): opt: avoid quadratic cost of string += string.
139 for _, edit := range edits[1:] {
140 between := src[prev.End:edit.Start]
141 if !strings.Contains(between, "\n") {
142 // overlapping lines: combine with previous edit.
143 prev.New += between + edit.New
144 prev.End = edit.End
145 } else {
146 // non-overlapping lines: flush previous edit.
147 expanded = append(expanded, expandEdit(prev, src))
148 prev = edit
149 }
150 }
151 return append(expanded, expandEdit(prev, src)), nil // flush final edit
152}
153
154// expandEdit returns edit expanded to complete whole lines.
155func expandEdit(edit Edit, src string) Edit {
156 // Expand start left to start of line.
157 // (delta is the zero-based column number of start.)
158 start := edit.Start
159 if delta := start - 1 - strings.LastIndex(src[:start], "\n"); delta > 0 {
160 edit.Start -= delta
161 edit.New = src[start-delta:start] + edit.New
162 }
163
164 // Expand end right to end of line.
165 end := edit.End
166 if end > 0 && src[end-1] != '\n' ||
167 edit.New != "" && edit.New[len(edit.New)-1] != '\n' {
168 if nl := strings.IndexByte(src[end:], '\n'); nl < 0 {
169 edit.End = len(src) // extend to EOF
170 } else {
171 edit.End = end + nl + 1 // extend beyond \n
172 }
173 }
174 edit.New += src[end:edit.End]
175
176 return edit
177}