1// Copyright 2019, The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package cmp
6
7import (
8 "bytes"
9 "fmt"
10 "math"
11 "reflect"
12 "strconv"
13 "strings"
14 "unicode"
15 "unicode/utf8"
16
17 "github.com/google/go-cmp/cmp/internal/diff"
18)
19
20// CanFormatDiffSlice reports whether we support custom formatting for nodes
21// that are slices of primitive kinds or strings.
22func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {
23 switch {
24 case opts.DiffMode != diffUnknown:
25 return false // Must be formatting in diff mode
26 case v.NumDiff == 0:
27 return false // No differences detected
28 case !v.ValueX.IsValid() || !v.ValueY.IsValid():
29 return false // Both values must be valid
30 case v.NumIgnored > 0:
31 return false // Some ignore option was used
32 case v.NumTransformed > 0:
33 return false // Some transform option was used
34 case v.NumCompared > 1:
35 return false // More than one comparison was used
36 case v.NumCompared == 1 && v.Type.Name() != "":
37 // The need for cmp to check applicability of options on every element
38 // in a slice is a significant performance detriment for large []byte.
39 // The workaround is to specify Comparer(bytes.Equal),
40 // which enables cmp to compare []byte more efficiently.
41 // If they differ, we still want to provide batched diffing.
42 // The logic disallows named types since they tend to have their own
43 // String method, with nicer formatting than what this provides.
44 return false
45 }
46
47 // Check whether this is an interface with the same concrete types.
48 t := v.Type
49 vx, vy := v.ValueX, v.ValueY
50 if t.Kind() == reflect.Interface && !vx.IsNil() && !vy.IsNil() && vx.Elem().Type() == vy.Elem().Type() {
51 vx, vy = vx.Elem(), vy.Elem()
52 t = vx.Type()
53 }
54
55 // Check whether we provide specialized diffing for this type.
56 switch t.Kind() {
57 case reflect.String:
58 case reflect.Array, reflect.Slice:
59 // Only slices of primitive types have specialized handling.
60 switch t.Elem().Kind() {
61 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
62 reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
63 reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
64 default:
65 return false
66 }
67
68 // Both slice values have to be non-empty.
69 if t.Kind() == reflect.Slice && (vx.Len() == 0 || vy.Len() == 0) {
70 return false
71 }
72
73 // If a sufficient number of elements already differ,
74 // use specialized formatting even if length requirement is not met.
75 if v.NumDiff > v.NumSame {
76 return true
77 }
78 default:
79 return false
80 }
81
82 // Use specialized string diffing for longer slices or strings.
83 const minLength = 32
84 return vx.Len() >= minLength && vy.Len() >= minLength
85}
86
87// FormatDiffSlice prints a diff for the slices (or strings) represented by v.
88// This provides custom-tailored logic to make printing of differences in
89// textual strings and slices of primitive kinds more readable.
90func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
91 assert(opts.DiffMode == diffUnknown)
92 t, vx, vy := v.Type, v.ValueX, v.ValueY
93 if t.Kind() == reflect.Interface {
94 vx, vy = vx.Elem(), vy.Elem()
95 t = vx.Type()
96 opts = opts.WithTypeMode(emitType)
97 }
98
99 // Auto-detect the type of the data.
100 var sx, sy string
101 var ssx, ssy []string
102 var isString, isMostlyText, isPureLinedText, isBinary bool
103 switch {
104 case t.Kind() == reflect.String:
105 sx, sy = vx.String(), vy.String()
106 isString = true
107 case t.Kind() == reflect.Slice && t.Elem() == byteType:
108 sx, sy = string(vx.Bytes()), string(vy.Bytes())
109 isString = true
110 case t.Kind() == reflect.Array:
111 // Arrays need to be addressable for slice operations to work.
112 vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem()
113 vx2.Set(vx)
114 vy2.Set(vy)
115 vx, vy = vx2, vy2
116 }
117 if isString {
118 var numTotalRunes, numValidRunes, numLines, lastLineIdx, maxLineLen int
119 for i, r := range sx + sy {
120 numTotalRunes++
121 if (unicode.IsPrint(r) || unicode.IsSpace(r)) && r != utf8.RuneError {
122 numValidRunes++
123 }
124 if r == '\n' {
125 if maxLineLen < i-lastLineIdx {
126 maxLineLen = i - lastLineIdx
127 }
128 lastLineIdx = i + 1
129 numLines++
130 }
131 }
132 isPureText := numValidRunes == numTotalRunes
133 isMostlyText = float64(numValidRunes) > math.Floor(0.90*float64(numTotalRunes))
134 isPureLinedText = isPureText && numLines >= 4 && maxLineLen <= 1024
135 isBinary = !isMostlyText
136
137 // Avoid diffing by lines if it produces a significantly more complex
138 // edit script than diffing by bytes.
139 if isPureLinedText {
140 ssx = strings.Split(sx, "\n")
141 ssy = strings.Split(sy, "\n")
142 esLines := diff.Difference(len(ssx), len(ssy), func(ix, iy int) diff.Result {
143 return diff.BoolResult(ssx[ix] == ssy[iy])
144 })
145 esBytes := diff.Difference(len(sx), len(sy), func(ix, iy int) diff.Result {
146 return diff.BoolResult(sx[ix] == sy[iy])
147 })
148 efficiencyLines := float64(esLines.Dist()) / float64(len(esLines))
149 efficiencyBytes := float64(esBytes.Dist()) / float64(len(esBytes))
150 quotedLength := len(strconv.Quote(sx + sy))
151 unquotedLength := len(sx) + len(sy)
152 escapeExpansionRatio := float64(quotedLength) / float64(unquotedLength)
153 isPureLinedText = efficiencyLines < 4*efficiencyBytes || escapeExpansionRatio > 1.1
154 }
155 }
156
157 // Format the string into printable records.
158 var list textList
159 var delim string
160 switch {
161 // If the text appears to be multi-lined text,
162 // then perform differencing across individual lines.
163 case isPureLinedText:
164 list = opts.formatDiffSlice(
165 reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line",
166 func(v reflect.Value, d diffMode) textRecord {
167 s := formatString(v.Index(0).String())
168 return textRecord{Diff: d, Value: textLine(s)}
169 },
170 )
171 delim = "\n"
172
173 // If possible, use a custom triple-quote (""") syntax for printing
174 // differences in a string literal. This format is more readable,
175 // but has edge-cases where differences are visually indistinguishable.
176 // This format is avoided under the following conditions:
177 // - A line starts with `"""`
178 // - A line starts with "..."
179 // - A line contains non-printable characters
180 // - Adjacent different lines differ only by whitespace
181 //
182 // For example:
183 //
184 // """
185 // ... // 3 identical lines
186 // foo
187 // bar
188 // - baz
189 // + BAZ
190 // """
191 isTripleQuoted := true
192 prevRemoveLines := map[string]bool{}
193 prevInsertLines := map[string]bool{}
194 var list2 textList
195 list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})
196 for _, r := range list {
197 if !r.Value.Equal(textEllipsis) {
198 line, _ := strconv.Unquote(string(r.Value.(textLine)))
199 line = strings.TrimPrefix(strings.TrimSuffix(line, "\r"), "\r") // trim leading/trailing carriage returns for legacy Windows endline support
200 normLine := strings.Map(func(r rune) rune {
201 if unicode.IsSpace(r) {
202 return -1 // drop whitespace to avoid visually indistinguishable output
203 }
204 return r
205 }, line)
206 isPrintable := func(r rune) bool {
207 return unicode.IsPrint(r) || r == '\t' // specially treat tab as printable
208 }
209 isTripleQuoted = !strings.HasPrefix(line, `"""`) && !strings.HasPrefix(line, "...") && strings.TrimFunc(line, isPrintable) == ""
210 switch r.Diff {
211 case diffRemoved:
212 isTripleQuoted = isTripleQuoted && !prevInsertLines[normLine]
213 prevRemoveLines[normLine] = true
214 case diffInserted:
215 isTripleQuoted = isTripleQuoted && !prevRemoveLines[normLine]
216 prevInsertLines[normLine] = true
217 }
218 if !isTripleQuoted {
219 break
220 }
221 r.Value = textLine(line)
222 r.ElideComma = true
223 }
224 if !(r.Diff == diffRemoved || r.Diff == diffInserted) { // start a new non-adjacent difference group
225 prevRemoveLines = map[string]bool{}
226 prevInsertLines = map[string]bool{}
227 }
228 list2 = append(list2, r)
229 }
230 if r := list2[len(list2)-1]; r.Diff == diffIdentical && len(r.Value.(textLine)) == 0 {
231 list2 = list2[:len(list2)-1] // elide single empty line at the end
232 }
233 list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})
234 if isTripleQuoted {
235 var out textNode = &textWrap{Prefix: "(", Value: list2, Suffix: ")"}
236 switch t.Kind() {
237 case reflect.String:
238 if t != stringType {
239 out = opts.FormatType(t, out)
240 }
241 case reflect.Slice:
242 // Always emit type for slices since the triple-quote syntax
243 // looks like a string (not a slice).
244 opts = opts.WithTypeMode(emitType)
245 out = opts.FormatType(t, out)
246 }
247 return out
248 }
249
250 // If the text appears to be single-lined text,
251 // then perform differencing in approximately fixed-sized chunks.
252 // The output is printed as quoted strings.
253 case isMostlyText:
254 list = opts.formatDiffSlice(
255 reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte",
256 func(v reflect.Value, d diffMode) textRecord {
257 s := formatString(v.String())
258 return textRecord{Diff: d, Value: textLine(s)}
259 },
260 )
261
262 // If the text appears to be binary data,
263 // then perform differencing in approximately fixed-sized chunks.
264 // The output is inspired by hexdump.
265 case isBinary:
266 list = opts.formatDiffSlice(
267 reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte",
268 func(v reflect.Value, d diffMode) textRecord {
269 var ss []string
270 for i := 0; i < v.Len(); i++ {
271 ss = append(ss, formatHex(v.Index(i).Uint()))
272 }
273 s := strings.Join(ss, ", ")
274 comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String())))
275 return textRecord{Diff: d, Value: textLine(s), Comment: comment}
276 },
277 )
278
279 // For all other slices of primitive types,
280 // then perform differencing in approximately fixed-sized chunks.
281 // The size of each chunk depends on the width of the element kind.
282 default:
283 var chunkSize int
284 if t.Elem().Kind() == reflect.Bool {
285 chunkSize = 16
286 } else {
287 switch t.Elem().Bits() {
288 case 8:
289 chunkSize = 16
290 case 16:
291 chunkSize = 12
292 case 32:
293 chunkSize = 8
294 default:
295 chunkSize = 8
296 }
297 }
298 list = opts.formatDiffSlice(
299 vx, vy, chunkSize, t.Elem().Kind().String(),
300 func(v reflect.Value, d diffMode) textRecord {
301 var ss []string
302 for i := 0; i < v.Len(); i++ {
303 switch t.Elem().Kind() {
304 case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
305 ss = append(ss, fmt.Sprint(v.Index(i).Int()))
306 case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64:
307 ss = append(ss, fmt.Sprint(v.Index(i).Uint()))
308 case reflect.Uint8, reflect.Uintptr:
309 ss = append(ss, formatHex(v.Index(i).Uint()))
310 case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
311 ss = append(ss, fmt.Sprint(v.Index(i).Interface()))
312 }
313 }
314 s := strings.Join(ss, ", ")
315 return textRecord{Diff: d, Value: textLine(s)}
316 },
317 )
318 }
319
320 // Wrap the output with appropriate type information.
321 var out textNode = &textWrap{Prefix: "{", Value: list, Suffix: "}"}
322 if !isMostlyText {
323 // The "{...}" byte-sequence literal is not valid Go syntax for strings.
324 // Emit the type for extra clarity (e.g. "string{...}").
325 if t.Kind() == reflect.String {
326 opts = opts.WithTypeMode(emitType)
327 }
328 return opts.FormatType(t, out)
329 }
330 switch t.Kind() {
331 case reflect.String:
332 out = &textWrap{Prefix: "strings.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}
333 if t != stringType {
334 out = opts.FormatType(t, out)
335 }
336 case reflect.Slice:
337 out = &textWrap{Prefix: "bytes.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}
338 if t != bytesType {
339 out = opts.FormatType(t, out)
340 }
341 }
342 return out
343}
344
345// formatASCII formats s as an ASCII string.
346// This is useful for printing binary strings in a semi-legible way.
347func formatASCII(s string) string {
348 b := bytes.Repeat([]byte{'.'}, len(s))
349 for i := 0; i < len(s); i++ {
350 if ' ' <= s[i] && s[i] <= '~' {
351 b[i] = s[i]
352 }
353 }
354 return string(b)
355}
356
357func (opts formatOptions) formatDiffSlice(
358 vx, vy reflect.Value, chunkSize int, name string,
359 makeRec func(reflect.Value, diffMode) textRecord,
360) (list textList) {
361 eq := func(ix, iy int) bool {
362 return vx.Index(ix).Interface() == vy.Index(iy).Interface()
363 }
364 es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result {
365 return diff.BoolResult(eq(ix, iy))
366 })
367
368 appendChunks := func(v reflect.Value, d diffMode) int {
369 n0 := v.Len()
370 for v.Len() > 0 {
371 n := chunkSize
372 if n > v.Len() {
373 n = v.Len()
374 }
375 list = append(list, makeRec(v.Slice(0, n), d))
376 v = v.Slice(n, v.Len())
377 }
378 return n0 - v.Len()
379 }
380
381 var numDiffs int
382 maxLen := -1
383 if opts.LimitVerbosity {
384 maxLen = (1 << opts.verbosity()) << 2 // 4, 8, 16, 32, 64, etc...
385 opts.VerbosityLevel--
386 }
387
388 groups := coalesceAdjacentEdits(name, es)
389 groups = coalesceInterveningIdentical(groups, chunkSize/4)
390 groups = cleanupSurroundingIdentical(groups, eq)
391 maxGroup := diffStats{Name: name}
392 for i, ds := range groups {
393 if maxLen >= 0 && numDiffs >= maxLen {
394 maxGroup = maxGroup.Append(ds)
395 continue
396 }
397
398 // Print equal.
399 if ds.NumDiff() == 0 {
400 // Compute the number of leading and trailing equal bytes to print.
401 var numLo, numHi int
402 numEqual := ds.NumIgnored + ds.NumIdentical
403 for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 {
404 numLo++
405 }
406 for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 {
407 numHi++
408 }
409 if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 {
410 numHi = numEqual - numLo // Avoid pointless coalescing of single equal row
411 }
412
413 // Print the equal bytes.
414 appendChunks(vx.Slice(0, numLo), diffIdentical)
415 if numEqual > numLo+numHi {
416 ds.NumIdentical -= numLo + numHi
417 list.AppendEllipsis(ds)
418 }
419 appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical)
420 vx = vx.Slice(numEqual, vx.Len())
421 vy = vy.Slice(numEqual, vy.Len())
422 continue
423 }
424
425 // Print unequal.
426 len0 := len(list)
427 nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved)
428 vx = vx.Slice(nx, vx.Len())
429 ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted)
430 vy = vy.Slice(ny, vy.Len())
431 numDiffs += len(list) - len0
432 }
433 if maxGroup.IsZero() {
434 assert(vx.Len() == 0 && vy.Len() == 0)
435 } else {
436 list.AppendEllipsis(maxGroup)
437 }
438 return list
439}
440
441// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent
442// equal or unequal counts.
443//
444// Example:
445//
446// Input: "..XXY...Y"
447// Output: [
448// {NumIdentical: 2},
449// {NumRemoved: 2, NumInserted 1},
450// {NumIdentical: 3},
451// {NumInserted: 1},
452// ]
453func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) {
454 var prevMode byte
455 lastStats := func(mode byte) *diffStats {
456 if prevMode != mode {
457 groups = append(groups, diffStats{Name: name})
458 prevMode = mode
459 }
460 return &groups[len(groups)-1]
461 }
462 for _, e := range es {
463 switch e {
464 case diff.Identity:
465 lastStats('=').NumIdentical++
466 case diff.UniqueX:
467 lastStats('!').NumRemoved++
468 case diff.UniqueY:
469 lastStats('!').NumInserted++
470 case diff.Modified:
471 lastStats('!').NumModified++
472 }
473 }
474 return groups
475}
476
477// coalesceInterveningIdentical coalesces sufficiently short (<= windowSize)
478// equal groups into adjacent unequal groups that currently result in a
479// dual inserted/removed printout. This acts as a high-pass filter to smooth
480// out high-frequency changes within the windowSize.
481//
482// Example:
483//
484// WindowSize: 16,
485// Input: [
486// {NumIdentical: 61}, // group 0
487// {NumRemoved: 3, NumInserted: 1}, // group 1
488// {NumIdentical: 6}, // ├── coalesce
489// {NumInserted: 2}, // ├── coalesce
490// {NumIdentical: 1}, // ├── coalesce
491// {NumRemoved: 9}, // └── coalesce
492// {NumIdentical: 64}, // group 2
493// {NumRemoved: 3, NumInserted: 1}, // group 3
494// {NumIdentical: 6}, // ├── coalesce
495// {NumInserted: 2}, // ├── coalesce
496// {NumIdentical: 1}, // ├── coalesce
497// {NumRemoved: 7}, // ├── coalesce
498// {NumIdentical: 1}, // ├── coalesce
499// {NumRemoved: 2}, // └── coalesce
500// {NumIdentical: 63}, // group 4
501// ]
502// Output: [
503// {NumIdentical: 61},
504// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3},
505// {NumIdentical: 64},
506// {NumIdentical: 8, NumRemoved: 12, NumInserted: 3},
507// {NumIdentical: 63},
508// ]
509func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats {
510 groups, groupsOrig := groups[:0], groups
511 for i, ds := range groupsOrig {
512 if len(groups) >= 2 && ds.NumDiff() > 0 {
513 prev := &groups[len(groups)-2] // Unequal group
514 curr := &groups[len(groups)-1] // Equal group
515 next := &groupsOrig[i] // Unequal group
516 hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0
517 hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0
518 if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize {
519 *prev = prev.Append(*curr).Append(*next)
520 groups = groups[:len(groups)-1] // Truncate off equal group
521 continue
522 }
523 }
524 groups = append(groups, ds)
525 }
526 return groups
527}
528
529// cleanupSurroundingIdentical scans through all unequal groups, and
530// moves any leading sequence of equal elements to the preceding equal group and
531// moves and trailing sequence of equal elements to the succeeding equal group.
532//
533// This is necessary since coalesceInterveningIdentical may coalesce edit groups
534// together such that leading/trailing spans of equal elements becomes possible.
535// Note that this can occur even with an optimal diffing algorithm.
536//
537// Example:
538//
539// Input: [
540// {NumIdentical: 61},
541// {NumIdentical: 1 , NumRemoved: 11, NumInserted: 2}, // assume 3 leading identical elements
542// {NumIdentical: 67},
543// {NumIdentical: 7, NumRemoved: 12, NumInserted: 3}, // assume 10 trailing identical elements
544// {NumIdentical: 54},
545// ]
546// Output: [
547// {NumIdentical: 64}, // incremented by 3
548// {NumRemoved: 9},
549// {NumIdentical: 67},
550// {NumRemoved: 9},
551// {NumIdentical: 64}, // incremented by 10
552// ]
553func cleanupSurroundingIdentical(groups []diffStats, eq func(i, j int) bool) []diffStats {
554 var ix, iy int // indexes into sequence x and y
555 for i, ds := range groups {
556 // Handle equal group.
557 if ds.NumDiff() == 0 {
558 ix += ds.NumIdentical
559 iy += ds.NumIdentical
560 continue
561 }
562
563 // Handle unequal group.
564 nx := ds.NumIdentical + ds.NumRemoved + ds.NumModified
565 ny := ds.NumIdentical + ds.NumInserted + ds.NumModified
566 var numLeadingIdentical, numTrailingIdentical int
567 for j := 0; j < nx && j < ny && eq(ix+j, iy+j); j++ {
568 numLeadingIdentical++
569 }
570 for j := 0; j < nx && j < ny && eq(ix+nx-1-j, iy+ny-1-j); j++ {
571 numTrailingIdentical++
572 }
573 if numIdentical := numLeadingIdentical + numTrailingIdentical; numIdentical > 0 {
574 if numLeadingIdentical > 0 {
575 // Remove leading identical span from this group and
576 // insert it into the preceding group.
577 if i-1 >= 0 {
578 groups[i-1].NumIdentical += numLeadingIdentical
579 } else {
580 // No preceding group exists, so prepend a new group,
581 // but do so after we finish iterating over all groups.
582 defer func() {
583 groups = append([]diffStats{{Name: groups[0].Name, NumIdentical: numLeadingIdentical}}, groups...)
584 }()
585 }
586 // Increment indexes since the preceding group would have handled this.
587 ix += numLeadingIdentical
588 iy += numLeadingIdentical
589 }
590 if numTrailingIdentical > 0 {
591 // Remove trailing identical span from this group and
592 // insert it into the succeeding group.
593 if i+1 < len(groups) {
594 groups[i+1].NumIdentical += numTrailingIdentical
595 } else {
596 // No succeeding group exists, so append a new group,
597 // but do so after we finish iterating over all groups.
598 defer func() {
599 groups = append(groups, diffStats{Name: groups[len(groups)-1].Name, NumIdentical: numTrailingIdentical})
600 }()
601 }
602 // Do not increment indexes since the succeeding group will handle this.
603 }
604
605 // Update this group since some identical elements were removed.
606 nx -= numIdentical
607 ny -= numIdentical
608 groups[i] = diffStats{Name: ds.Name, NumRemoved: nx, NumInserted: ny}
609 }
610 ix += nx
611 iy += ny
612 }
613 return groups
614}