suggestionList.go

 1package validator
 2
 3import (
 4	"sort"
 5	"strings"
 6
 7	"github.com/agnivade/levenshtein"
 8)
 9
10// Given an invalid input string and a list of valid options, returns a filtered
11// list of valid options sorted based on their similarity with the input.
12func SuggestionList(input string, options []string) []string {
13	var results []string
14	optionsByDistance := map[string]int{}
15
16	for _, option := range options {
17		distance := lexicalDistance(input, option)
18		threshold := calcThreshold(input, option)
19		if distance <= threshold {
20			results = append(results, option)
21			optionsByDistance[option] = distance
22		}
23	}
24
25	sort.Slice(results, func(i, j int) bool {
26		return optionsByDistance[results[i]] < optionsByDistance[results[j]]
27	})
28	return results
29}
30
31func calcThreshold(a, b string) (threshold int) {
32	if len(a) >= len(b) {
33		threshold = len(a) / 2
34	} else {
35		threshold = len(b) / 2
36	}
37	if threshold < 1 {
38		threshold = 1
39	}
40	return
41}
42
43// Computes the lexical distance between strings A and B.
44//
45// The "distance" between two strings is given by counting the minimum number
46// of edits needed to transform string A into string B. An edit can be an
47// insertion, deletion, or substitution of a single character, or a swap of two
48// adjacent characters.
49//
50// Includes a custom alteration from Damerau-Levenshtein to treat case changes
51// as a single edit which helps identify mis-cased values with an edit distance
52// of 1.
53//
54// This distance can be useful for detecting typos in input or sorting
55func lexicalDistance(a, b string) int {
56	if a == b {
57		return 0
58	}
59
60	a = strings.ToLower(a)
61	b = strings.ToLower(b)
62
63	// Any case change counts as a single edit
64	if a == b {
65		return 1
66	}
67
68	return levenshtein.ComputeDistance(a, b)
69}