report_slices.go

  1// Copyright 2019, The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5package cmp
  6
  7import (
  8	"bytes"
  9	"fmt"
 10	"math"
 11	"reflect"
 12	"strconv"
 13	"strings"
 14	"unicode"
 15	"unicode/utf8"
 16
 17	"github.com/google/go-cmp/cmp/internal/diff"
 18)
 19
 20// CanFormatDiffSlice reports whether we support custom formatting for nodes
 21// that are slices of primitive kinds or strings.
 22func (opts formatOptions) CanFormatDiffSlice(v *valueNode) bool {
 23	switch {
 24	case opts.DiffMode != diffUnknown:
 25		return false // Must be formatting in diff mode
 26	case v.NumDiff == 0:
 27		return false // No differences detected
 28	case !v.ValueX.IsValid() || !v.ValueY.IsValid():
 29		return false // Both values must be valid
 30	case v.NumIgnored > 0:
 31		return false // Some ignore option was used
 32	case v.NumTransformed > 0:
 33		return false // Some transform option was used
 34	case v.NumCompared > 1:
 35		return false // More than one comparison was used
 36	case v.NumCompared == 1 && v.Type.Name() != "":
 37		// The need for cmp to check applicability of options on every element
 38		// in a slice is a significant performance detriment for large []byte.
 39		// The workaround is to specify Comparer(bytes.Equal),
 40		// which enables cmp to compare []byte more efficiently.
 41		// If they differ, we still want to provide batched diffing.
 42		// The logic disallows named types since they tend to have their own
 43		// String method, with nicer formatting than what this provides.
 44		return false
 45	}
 46
 47	// Check whether this is an interface with the same concrete types.
 48	t := v.Type
 49	vx, vy := v.ValueX, v.ValueY
 50	if t.Kind() == reflect.Interface && !vx.IsNil() && !vy.IsNil() && vx.Elem().Type() == vy.Elem().Type() {
 51		vx, vy = vx.Elem(), vy.Elem()
 52		t = vx.Type()
 53	}
 54
 55	// Check whether we provide specialized diffing for this type.
 56	switch t.Kind() {
 57	case reflect.String:
 58	case reflect.Array, reflect.Slice:
 59		// Only slices of primitive types have specialized handling.
 60		switch t.Elem().Kind() {
 61		case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64,
 62			reflect.Uint, reflect.Uint8, reflect.Uint16, reflect.Uint32, reflect.Uint64, reflect.Uintptr,
 63			reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
 64		default:
 65			return false
 66		}
 67
 68		// Both slice values have to be non-empty.
 69		if t.Kind() == reflect.Slice && (vx.Len() == 0 || vy.Len() == 0) {
 70			return false
 71		}
 72
 73		// If a sufficient number of elements already differ,
 74		// use specialized formatting even if length requirement is not met.
 75		if v.NumDiff > v.NumSame {
 76			return true
 77		}
 78	default:
 79		return false
 80	}
 81
 82	// Use specialized string diffing for longer slices or strings.
 83	const minLength = 32
 84	return vx.Len() >= minLength && vy.Len() >= minLength
 85}
 86
 87// FormatDiffSlice prints a diff for the slices (or strings) represented by v.
 88// This provides custom-tailored logic to make printing of differences in
 89// textual strings and slices of primitive kinds more readable.
 90func (opts formatOptions) FormatDiffSlice(v *valueNode) textNode {
 91	assert(opts.DiffMode == diffUnknown)
 92	t, vx, vy := v.Type, v.ValueX, v.ValueY
 93	if t.Kind() == reflect.Interface {
 94		vx, vy = vx.Elem(), vy.Elem()
 95		t = vx.Type()
 96		opts = opts.WithTypeMode(emitType)
 97	}
 98
 99	// Auto-detect the type of the data.
100	var sx, sy string
101	var ssx, ssy []string
102	var isString, isMostlyText, isPureLinedText, isBinary bool
103	switch {
104	case t.Kind() == reflect.String:
105		sx, sy = vx.String(), vy.String()
106		isString = true
107	case t.Kind() == reflect.Slice && t.Elem() == byteType:
108		sx, sy = string(vx.Bytes()), string(vy.Bytes())
109		isString = true
110	case t.Kind() == reflect.Array:
111		// Arrays need to be addressable for slice operations to work.
112		vx2, vy2 := reflect.New(t).Elem(), reflect.New(t).Elem()
113		vx2.Set(vx)
114		vy2.Set(vy)
115		vx, vy = vx2, vy2
116	}
117	if isString {
118		var numTotalRunes, numValidRunes, numLines, lastLineIdx, maxLineLen int
119		for i, r := range sx + sy {
120			numTotalRunes++
121			if (unicode.IsPrint(r) || unicode.IsSpace(r)) && r != utf8.RuneError {
122				numValidRunes++
123			}
124			if r == '\n' {
125				if maxLineLen < i-lastLineIdx {
126					maxLineLen = i - lastLineIdx
127				}
128				lastLineIdx = i + 1
129				numLines++
130			}
131		}
132		isPureText := numValidRunes == numTotalRunes
133		isMostlyText = float64(numValidRunes) > math.Floor(0.90*float64(numTotalRunes))
134		isPureLinedText = isPureText && numLines >= 4 && maxLineLen <= 1024
135		isBinary = !isMostlyText
136
137		// Avoid diffing by lines if it produces a significantly more complex
138		// edit script than diffing by bytes.
139		if isPureLinedText {
140			ssx = strings.Split(sx, "\n")
141			ssy = strings.Split(sy, "\n")
142			esLines := diff.Difference(len(ssx), len(ssy), func(ix, iy int) diff.Result {
143				return diff.BoolResult(ssx[ix] == ssy[iy])
144			})
145			esBytes := diff.Difference(len(sx), len(sy), func(ix, iy int) diff.Result {
146				return diff.BoolResult(sx[ix] == sy[iy])
147			})
148			efficiencyLines := float64(esLines.Dist()) / float64(len(esLines))
149			efficiencyBytes := float64(esBytes.Dist()) / float64(len(esBytes))
150			quotedLength := len(strconv.Quote(sx + sy))
151			unquotedLength := len(sx) + len(sy)
152			escapeExpansionRatio := float64(quotedLength) / float64(unquotedLength)
153			isPureLinedText = efficiencyLines < 4*efficiencyBytes || escapeExpansionRatio > 1.1
154		}
155	}
156
157	// Format the string into printable records.
158	var list textList
159	var delim string
160	switch {
161	// If the text appears to be multi-lined text,
162	// then perform differencing across individual lines.
163	case isPureLinedText:
164		list = opts.formatDiffSlice(
165			reflect.ValueOf(ssx), reflect.ValueOf(ssy), 1, "line",
166			func(v reflect.Value, d diffMode) textRecord {
167				s := formatString(v.Index(0).String())
168				return textRecord{Diff: d, Value: textLine(s)}
169			},
170		)
171		delim = "\n"
172
173		// If possible, use a custom triple-quote (""") syntax for printing
174		// differences in a string literal. This format is more readable,
175		// but has edge-cases where differences are visually indistinguishable.
176		// This format is avoided under the following conditions:
177		//   - A line starts with `"""`
178		//   - A line starts with "..."
179		//   - A line contains non-printable characters
180		//   - Adjacent different lines differ only by whitespace
181		//
182		// For example:
183		//
184		//		"""
185		//		... // 3 identical lines
186		//		foo
187		//		bar
188		//	-	baz
189		//	+	BAZ
190		//		"""
191		isTripleQuoted := true
192		prevRemoveLines := map[string]bool{}
193		prevInsertLines := map[string]bool{}
194		var list2 textList
195		list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})
196		for _, r := range list {
197			if !r.Value.Equal(textEllipsis) {
198				line, _ := strconv.Unquote(string(r.Value.(textLine)))
199				line = strings.TrimPrefix(strings.TrimSuffix(line, "\r"), "\r") // trim leading/trailing carriage returns for legacy Windows endline support
200				normLine := strings.Map(func(r rune) rune {
201					if unicode.IsSpace(r) {
202						return -1 // drop whitespace to avoid visually indistinguishable output
203					}
204					return r
205				}, line)
206				isPrintable := func(r rune) bool {
207					return unicode.IsPrint(r) || r == '\t' // specially treat tab as printable
208				}
209				isTripleQuoted = !strings.HasPrefix(line, `"""`) && !strings.HasPrefix(line, "...") && strings.TrimFunc(line, isPrintable) == ""
210				switch r.Diff {
211				case diffRemoved:
212					isTripleQuoted = isTripleQuoted && !prevInsertLines[normLine]
213					prevRemoveLines[normLine] = true
214				case diffInserted:
215					isTripleQuoted = isTripleQuoted && !prevRemoveLines[normLine]
216					prevInsertLines[normLine] = true
217				}
218				if !isTripleQuoted {
219					break
220				}
221				r.Value = textLine(line)
222				r.ElideComma = true
223			}
224			if !(r.Diff == diffRemoved || r.Diff == diffInserted) { // start a new non-adjacent difference group
225				prevRemoveLines = map[string]bool{}
226				prevInsertLines = map[string]bool{}
227			}
228			list2 = append(list2, r)
229		}
230		if r := list2[len(list2)-1]; r.Diff == diffIdentical && len(r.Value.(textLine)) == 0 {
231			list2 = list2[:len(list2)-1] // elide single empty line at the end
232		}
233		list2 = append(list2, textRecord{Value: textLine(`"""`), ElideComma: true})
234		if isTripleQuoted {
235			var out textNode = &textWrap{Prefix: "(", Value: list2, Suffix: ")"}
236			switch t.Kind() {
237			case reflect.String:
238				if t != stringType {
239					out = opts.FormatType(t, out)
240				}
241			case reflect.Slice:
242				// Always emit type for slices since the triple-quote syntax
243				// looks like a string (not a slice).
244				opts = opts.WithTypeMode(emitType)
245				out = opts.FormatType(t, out)
246			}
247			return out
248		}
249
250	// If the text appears to be single-lined text,
251	// then perform differencing in approximately fixed-sized chunks.
252	// The output is printed as quoted strings.
253	case isMostlyText:
254		list = opts.formatDiffSlice(
255			reflect.ValueOf(sx), reflect.ValueOf(sy), 64, "byte",
256			func(v reflect.Value, d diffMode) textRecord {
257				s := formatString(v.String())
258				return textRecord{Diff: d, Value: textLine(s)}
259			},
260		)
261
262	// If the text appears to be binary data,
263	// then perform differencing in approximately fixed-sized chunks.
264	// The output is inspired by hexdump.
265	case isBinary:
266		list = opts.formatDiffSlice(
267			reflect.ValueOf(sx), reflect.ValueOf(sy), 16, "byte",
268			func(v reflect.Value, d diffMode) textRecord {
269				var ss []string
270				for i := 0; i < v.Len(); i++ {
271					ss = append(ss, formatHex(v.Index(i).Uint()))
272				}
273				s := strings.Join(ss, ", ")
274				comment := commentString(fmt.Sprintf("%c|%v|", d, formatASCII(v.String())))
275				return textRecord{Diff: d, Value: textLine(s), Comment: comment}
276			},
277		)
278
279	// For all other slices of primitive types,
280	// then perform differencing in approximately fixed-sized chunks.
281	// The size of each chunk depends on the width of the element kind.
282	default:
283		var chunkSize int
284		if t.Elem().Kind() == reflect.Bool {
285			chunkSize = 16
286		} else {
287			switch t.Elem().Bits() {
288			case 8:
289				chunkSize = 16
290			case 16:
291				chunkSize = 12
292			case 32:
293				chunkSize = 8
294			default:
295				chunkSize = 8
296			}
297		}
298		list = opts.formatDiffSlice(
299			vx, vy, chunkSize, t.Elem().Kind().String(),
300			func(v reflect.Value, d diffMode) textRecord {
301				var ss []string
302				for i := 0; i < v.Len(); i++ {
303					switch t.Elem().Kind() {
304					case reflect.Int, reflect.Int8, reflect.Int16, reflect.Int32, reflect.Int64:
305						ss = append(ss, fmt.Sprint(v.Index(i).Int()))
306					case reflect.Uint, reflect.Uint16, reflect.Uint32, reflect.Uint64:
307						ss = append(ss, fmt.Sprint(v.Index(i).Uint()))
308					case reflect.Uint8, reflect.Uintptr:
309						ss = append(ss, formatHex(v.Index(i).Uint()))
310					case reflect.Bool, reflect.Float32, reflect.Float64, reflect.Complex64, reflect.Complex128:
311						ss = append(ss, fmt.Sprint(v.Index(i).Interface()))
312					}
313				}
314				s := strings.Join(ss, ", ")
315				return textRecord{Diff: d, Value: textLine(s)}
316			},
317		)
318	}
319
320	// Wrap the output with appropriate type information.
321	var out textNode = &textWrap{Prefix: "{", Value: list, Suffix: "}"}
322	if !isMostlyText {
323		// The "{...}" byte-sequence literal is not valid Go syntax for strings.
324		// Emit the type for extra clarity (e.g. "string{...}").
325		if t.Kind() == reflect.String {
326			opts = opts.WithTypeMode(emitType)
327		}
328		return opts.FormatType(t, out)
329	}
330	switch t.Kind() {
331	case reflect.String:
332		out = &textWrap{Prefix: "strings.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}
333		if t != stringType {
334			out = opts.FormatType(t, out)
335		}
336	case reflect.Slice:
337		out = &textWrap{Prefix: "bytes.Join(", Value: out, Suffix: fmt.Sprintf(", %q)", delim)}
338		if t != bytesType {
339			out = opts.FormatType(t, out)
340		}
341	}
342	return out
343}
344
345// formatASCII formats s as an ASCII string.
346// This is useful for printing binary strings in a semi-legible way.
347func formatASCII(s string) string {
348	b := bytes.Repeat([]byte{'.'}, len(s))
349	for i := 0; i < len(s); i++ {
350		if ' ' <= s[i] && s[i] <= '~' {
351			b[i] = s[i]
352		}
353	}
354	return string(b)
355}
356
357func (opts formatOptions) formatDiffSlice(
358	vx, vy reflect.Value, chunkSize int, name string,
359	makeRec func(reflect.Value, diffMode) textRecord,
360) (list textList) {
361	eq := func(ix, iy int) bool {
362		return vx.Index(ix).Interface() == vy.Index(iy).Interface()
363	}
364	es := diff.Difference(vx.Len(), vy.Len(), func(ix, iy int) diff.Result {
365		return diff.BoolResult(eq(ix, iy))
366	})
367
368	appendChunks := func(v reflect.Value, d diffMode) int {
369		n0 := v.Len()
370		for v.Len() > 0 {
371			n := chunkSize
372			if n > v.Len() {
373				n = v.Len()
374			}
375			list = append(list, makeRec(v.Slice(0, n), d))
376			v = v.Slice(n, v.Len())
377		}
378		return n0 - v.Len()
379	}
380
381	var numDiffs int
382	maxLen := -1
383	if opts.LimitVerbosity {
384		maxLen = (1 << opts.verbosity()) << 2 // 4, 8, 16, 32, 64, etc...
385		opts.VerbosityLevel--
386	}
387
388	groups := coalesceAdjacentEdits(name, es)
389	groups = coalesceInterveningIdentical(groups, chunkSize/4)
390	groups = cleanupSurroundingIdentical(groups, eq)
391	maxGroup := diffStats{Name: name}
392	for i, ds := range groups {
393		if maxLen >= 0 && numDiffs >= maxLen {
394			maxGroup = maxGroup.Append(ds)
395			continue
396		}
397
398		// Print equal.
399		if ds.NumDiff() == 0 {
400			// Compute the number of leading and trailing equal bytes to print.
401			var numLo, numHi int
402			numEqual := ds.NumIgnored + ds.NumIdentical
403			for numLo < chunkSize*numContextRecords && numLo+numHi < numEqual && i != 0 {
404				numLo++
405			}
406			for numHi < chunkSize*numContextRecords && numLo+numHi < numEqual && i != len(groups)-1 {
407				numHi++
408			}
409			if numEqual-(numLo+numHi) <= chunkSize && ds.NumIgnored == 0 {
410				numHi = numEqual - numLo // Avoid pointless coalescing of single equal row
411			}
412
413			// Print the equal bytes.
414			appendChunks(vx.Slice(0, numLo), diffIdentical)
415			if numEqual > numLo+numHi {
416				ds.NumIdentical -= numLo + numHi
417				list.AppendEllipsis(ds)
418			}
419			appendChunks(vx.Slice(numEqual-numHi, numEqual), diffIdentical)
420			vx = vx.Slice(numEqual, vx.Len())
421			vy = vy.Slice(numEqual, vy.Len())
422			continue
423		}
424
425		// Print unequal.
426		len0 := len(list)
427		nx := appendChunks(vx.Slice(0, ds.NumIdentical+ds.NumRemoved+ds.NumModified), diffRemoved)
428		vx = vx.Slice(nx, vx.Len())
429		ny := appendChunks(vy.Slice(0, ds.NumIdentical+ds.NumInserted+ds.NumModified), diffInserted)
430		vy = vy.Slice(ny, vy.Len())
431		numDiffs += len(list) - len0
432	}
433	if maxGroup.IsZero() {
434		assert(vx.Len() == 0 && vy.Len() == 0)
435	} else {
436		list.AppendEllipsis(maxGroup)
437	}
438	return list
439}
440
441// coalesceAdjacentEdits coalesces the list of edits into groups of adjacent
442// equal or unequal counts.
443//
444// Example:
445//
446//	Input:  "..XXY...Y"
447//	Output: [
448//		{NumIdentical: 2},
449//		{NumRemoved: 2, NumInserted 1},
450//		{NumIdentical: 3},
451//		{NumInserted: 1},
452//	]
453func coalesceAdjacentEdits(name string, es diff.EditScript) (groups []diffStats) {
454	var prevMode byte
455	lastStats := func(mode byte) *diffStats {
456		if prevMode != mode {
457			groups = append(groups, diffStats{Name: name})
458			prevMode = mode
459		}
460		return &groups[len(groups)-1]
461	}
462	for _, e := range es {
463		switch e {
464		case diff.Identity:
465			lastStats('=').NumIdentical++
466		case diff.UniqueX:
467			lastStats('!').NumRemoved++
468		case diff.UniqueY:
469			lastStats('!').NumInserted++
470		case diff.Modified:
471			lastStats('!').NumModified++
472		}
473	}
474	return groups
475}
476
477// coalesceInterveningIdentical coalesces sufficiently short (<= windowSize)
478// equal groups into adjacent unequal groups that currently result in a
479// dual inserted/removed printout. This acts as a high-pass filter to smooth
480// out high-frequency changes within the windowSize.
481//
482// Example:
483//
484//	WindowSize: 16,
485//	Input: [
486//		{NumIdentical: 61},              // group 0
487//		{NumRemoved: 3, NumInserted: 1}, // group 1
488//		{NumIdentical: 6},               // ├── coalesce
489//		{NumInserted: 2},                // ├── coalesce
490//		{NumIdentical: 1},               // ├── coalesce
491//		{NumRemoved: 9},                 // └── coalesce
492//		{NumIdentical: 64},              // group 2
493//		{NumRemoved: 3, NumInserted: 1}, // group 3
494//		{NumIdentical: 6},               // ├── coalesce
495//		{NumInserted: 2},                // ├── coalesce
496//		{NumIdentical: 1},               // ├── coalesce
497//		{NumRemoved: 7},                 // ├── coalesce
498//		{NumIdentical: 1},               // ├── coalesce
499//		{NumRemoved: 2},                 // └── coalesce
500//		{NumIdentical: 63},              // group 4
501//	]
502//	Output: [
503//		{NumIdentical: 61},
504//		{NumIdentical: 7, NumRemoved: 12, NumInserted: 3},
505//		{NumIdentical: 64},
506//		{NumIdentical: 8, NumRemoved: 12, NumInserted: 3},
507//		{NumIdentical: 63},
508//	]
509func coalesceInterveningIdentical(groups []diffStats, windowSize int) []diffStats {
510	groups, groupsOrig := groups[:0], groups
511	for i, ds := range groupsOrig {
512		if len(groups) >= 2 && ds.NumDiff() > 0 {
513			prev := &groups[len(groups)-2] // Unequal group
514			curr := &groups[len(groups)-1] // Equal group
515			next := &groupsOrig[i]         // Unequal group
516			hadX, hadY := prev.NumRemoved > 0, prev.NumInserted > 0
517			hasX, hasY := next.NumRemoved > 0, next.NumInserted > 0
518			if ((hadX || hasX) && (hadY || hasY)) && curr.NumIdentical <= windowSize {
519				*prev = prev.Append(*curr).Append(*next)
520				groups = groups[:len(groups)-1] // Truncate off equal group
521				continue
522			}
523		}
524		groups = append(groups, ds)
525	}
526	return groups
527}
528
529// cleanupSurroundingIdentical scans through all unequal groups, and
530// moves any leading sequence of equal elements to the preceding equal group and
531// moves and trailing sequence of equal elements to the succeeding equal group.
532//
533// This is necessary since coalesceInterveningIdentical may coalesce edit groups
534// together such that leading/trailing spans of equal elements becomes possible.
535// Note that this can occur even with an optimal diffing algorithm.
536//
537// Example:
538//
539//	Input: [
540//		{NumIdentical: 61},
541//		{NumIdentical: 1 , NumRemoved: 11, NumInserted: 2}, // assume 3 leading identical elements
542//		{NumIdentical: 67},
543//		{NumIdentical: 7, NumRemoved: 12, NumInserted: 3},  // assume 10 trailing identical elements
544//		{NumIdentical: 54},
545//	]
546//	Output: [
547//		{NumIdentical: 64}, // incremented by 3
548//		{NumRemoved: 9},
549//		{NumIdentical: 67},
550//		{NumRemoved: 9},
551//		{NumIdentical: 64}, // incremented by 10
552//	]
553func cleanupSurroundingIdentical(groups []diffStats, eq func(i, j int) bool) []diffStats {
554	var ix, iy int // indexes into sequence x and y
555	for i, ds := range groups {
556		// Handle equal group.
557		if ds.NumDiff() == 0 {
558			ix += ds.NumIdentical
559			iy += ds.NumIdentical
560			continue
561		}
562
563		// Handle unequal group.
564		nx := ds.NumIdentical + ds.NumRemoved + ds.NumModified
565		ny := ds.NumIdentical + ds.NumInserted + ds.NumModified
566		var numLeadingIdentical, numTrailingIdentical int
567		for j := 0; j < nx && j < ny && eq(ix+j, iy+j); j++ {
568			numLeadingIdentical++
569		}
570		for j := 0; j < nx && j < ny && eq(ix+nx-1-j, iy+ny-1-j); j++ {
571			numTrailingIdentical++
572		}
573		if numIdentical := numLeadingIdentical + numTrailingIdentical; numIdentical > 0 {
574			if numLeadingIdentical > 0 {
575				// Remove leading identical span from this group and
576				// insert it into the preceding group.
577				if i-1 >= 0 {
578					groups[i-1].NumIdentical += numLeadingIdentical
579				} else {
580					// No preceding group exists, so prepend a new group,
581					// but do so after we finish iterating over all groups.
582					defer func() {
583						groups = append([]diffStats{{Name: groups[0].Name, NumIdentical: numLeadingIdentical}}, groups...)
584					}()
585				}
586				// Increment indexes since the preceding group would have handled this.
587				ix += numLeadingIdentical
588				iy += numLeadingIdentical
589			}
590			if numTrailingIdentical > 0 {
591				// Remove trailing identical span from this group and
592				// insert it into the succeeding group.
593				if i+1 < len(groups) {
594					groups[i+1].NumIdentical += numTrailingIdentical
595				} else {
596					// No succeeding group exists, so append a new group,
597					// but do so after we finish iterating over all groups.
598					defer func() {
599						groups = append(groups, diffStats{Name: groups[len(groups)-1].Name, NumIdentical: numTrailingIdentical})
600					}()
601				}
602				// Do not increment indexes since the succeeding group will handle this.
603			}
604
605			// Update this group since some identical elements were removed.
606			nx -= numIdentical
607			ny -= numIdentical
608			groups[i] = diffStats{Name: ds.Name, NumRemoved: nx, NumInserted: ny}
609		}
610		ix += nx
611		iy += ny
612	}
613	return groups
614}