1package validator
2
3import (
4 "sort"
5 "strings"
6
7 "github.com/agnivade/levenshtein"
8)
9
10// Given an invalid input string and a list of valid options, returns a filtered
11// list of valid options sorted based on their similarity with the input.
12func SuggestionList(input string, options []string) []string {
13 var results []string
14 optionsByDistance := map[string]int{}
15
16 for _, option := range options {
17 distance := lexicalDistance(input, option)
18 threshold := calcThreshold(input, option)
19 if distance <= threshold {
20 results = append(results, option)
21 optionsByDistance[option] = distance
22 }
23 }
24
25 sort.Slice(results, func(i, j int) bool {
26 return optionsByDistance[results[i]] < optionsByDistance[results[j]]
27 })
28 return results
29}
30
31func calcThreshold(a, b string) (threshold int) {
32 if len(a) >= len(b) {
33 threshold = len(a) / 2
34 } else {
35 threshold = len(b) / 2
36 }
37 if threshold < 1 {
38 threshold = 1
39 }
40 return
41}
42
43// Computes the lexical distance between strings A and B.
44//
45// The "distance" between two strings is given by counting the minimum number
46// of edits needed to transform string A into string B. An edit can be an
47// insertion, deletion, or substitution of a single character, or a swap of two
48// adjacent characters.
49//
50// Includes a custom alteration from Damerau-Levenshtein to treat case changes
51// as a single edit which helps identify mis-cased values with an edit distance
52// of 1.
53//
54// This distance can be useful for detecting typos in input or sorting
55func lexicalDistance(a, b string) int {
56 if a == b {
57 return 0
58 }
59
60 a = strings.ToLower(a)
61 b = strings.ToLower(b)
62
63 // Any case change counts as a single edit
64 if a == b {
65 return 1
66 }
67
68 return levenshtein.ComputeDistance(a, b)
69}