levenshtein.go

 1// Package levenshtein is a Go implementation to calculate Levenshtein Distance.
 2//
 3// Implementation taken from
 4// https://gist.github.com/andrei-m/982927#gistcomment-1931258
 5package levenshtein
 6
 7// ComputeDistance computes the levenshtein distance between the two
 8// strings passed as an argument. The return value is the levenshtein distance
 9//
10// Works on runes (Unicode code points) but does not normalize
11// the input strings. See https://blog.golang.org/normalization
12// and the golang.org/x/text/unicode/norm pacage.
13func ComputeDistance(a, b string) int {
14	if a == b {
15		return 0
16	}
17
18	// We need to convert to []rune if the strings are non-ascii.
19	// This could be avoided by using utf8.RuneCountInString
20	// and then doing some juggling with rune indices.
21	// The primary challenge is keeping track of the previous rune.
22	// With a range loop, its not that easy. And with a for-loop
23	// we need to keep track of the inter-rune width using utf8.DecodeRuneInString
24	s1 := []rune(a)
25	s2 := []rune(b)
26
27	// swap to save some memory O(min(a,b)) instead of O(a)
28	if len(s1) > len(s2) {
29		s1, s2 = s2, s1
30	}
31	lenS1 := len(s1)
32	lenS2 := len(s2)
33
34	// init the row
35	x := make([]int, lenS1+1)
36	for i := 0; i <= lenS1; i++ {
37		x[i] = i
38	}
39
40	// fill in the rest
41	for i := 1; i <= lenS2; i++ {
42		prev := i
43		var current int
44
45		for j := 1; j <= lenS1; j++ {
46
47			if s2[i-1] == s1[j-1] {
48				current = x[j-1] // match
49			} else {
50				current = min(x[j-1]+1, prev+1, x[j]+1)
51			}
52			x[j-1] = prev
53			prev = current
54		}
55		x[lenS1] = prev
56	}
57	return x[lenS1]
58}
59
60func min(a, b, c int) int {
61	if a < b {
62		if a < c {
63			return a
64		}
65	} else {
66		if b < c {
67			return b
68		}
69	}
70	return c
71}