ident.go

  1// Package ident provides functions for parsing and converting identifier names
  2// between various naming convention. It has support for MixedCaps, lowerCamelCase,
  3// and SCREAMING_SNAKE_CASE naming conventions.
  4package ident
  5
  6import (
  7	"strings"
  8	"unicode"
  9	"unicode/utf8"
 10)
 11
 12// ParseMixedCaps parses a MixedCaps identifier name.
 13//
 14// E.g., "ClientMutationID" -> {"Client", "Mutation", "ID"}.
 15func ParseMixedCaps(name string) Name {
 16	var words Name
 17
 18	// Split name at any lower -> Upper or Upper -> Upper,lower transitions.
 19	// Check each word for initialisms.
 20	runes := []rune(name)
 21	w, i := 0, 0 // Index of start of word, scan.
 22	for i+1 <= len(runes) {
 23		eow := false // Whether we hit the end of a word.
 24		if i+1 == len(runes) {
 25			eow = true
 26		} else if unicode.IsLower(runes[i]) && unicode.IsUpper(runes[i+1]) {
 27			// lower -> Upper.
 28			eow = true
 29		} else if i+2 < len(runes) && unicode.IsUpper(runes[i]) && unicode.IsUpper(runes[i+1]) && unicode.IsLower(runes[i+2]) {
 30			// Upper -> Upper,lower. End of acronym, followed by a word.
 31			eow = true
 32
 33			if string(runes[i:i+3]) == "IDs" { // Special case, plural form of ID initialism.
 34				eow = false
 35			}
 36		}
 37		i++
 38		if !eow {
 39			continue
 40		}
 41
 42		// [w, i) is a word.
 43		word := string(runes[w:i])
 44		if initialism, ok := isInitialism(word); ok {
 45			words = append(words, initialism)
 46		} else if i1, i2, ok := isTwoInitialisms(word); ok {
 47			words = append(words, i1, i2)
 48		} else {
 49			words = append(words, word)
 50		}
 51		w = i
 52	}
 53	return words
 54}
 55
 56// ParseLowerCamelCase parses a lowerCamelCase identifier name.
 57//
 58// E.g., "clientMutationId" -> {"client", "Mutation", "Id"}.
 59func ParseLowerCamelCase(name string) Name {
 60	var words Name
 61
 62	// Split name at any Upper letters.
 63	runes := []rune(name)
 64	w, i := 0, 0 // Index of start of word, scan.
 65	for i+1 <= len(runes) {
 66		eow := false // Whether we hit the end of a word.
 67		if i+1 == len(runes) {
 68			eow = true
 69		} else if unicode.IsUpper(runes[i+1]) {
 70			// Upper letter.
 71			eow = true
 72		}
 73		i++
 74		if !eow {
 75			continue
 76		}
 77
 78		// [w, i) is a word.
 79		words = append(words, string(runes[w:i]))
 80		w = i
 81	}
 82	return words
 83}
 84
 85// ParseScreamingSnakeCase parses a SCREAMING_SNAKE_CASE identifier name.
 86//
 87// E.g., "CLIENT_MUTATION_ID" -> {"CLIENT", "MUTATION", "ID"}.
 88func ParseScreamingSnakeCase(name string) Name {
 89	var words Name
 90
 91	// Split name at '_' characters.
 92	runes := []rune(name)
 93	w, i := 0, 0 // Index of start of word, scan.
 94	for i+1 <= len(runes) {
 95		eow := false // Whether we hit the end of a word.
 96		if i+1 == len(runes) {
 97			eow = true
 98		} else if runes[i+1] == '_' {
 99			// Underscore.
100			eow = true
101		}
102		i++
103		if !eow {
104			continue
105		}
106
107		// [w, i) is a word.
108		words = append(words, string(runes[w:i]))
109		if i < len(runes) && runes[i] == '_' {
110			// Skip underscore.
111			i++
112		}
113		w = i
114	}
115	return words
116}
117
118// Name is an identifier name, broken up into individual words.
119type Name []string
120
121// ToMixedCaps expresses identifer name in MixedCaps naming convention.
122//
123// E.g., "ClientMutationID".
124func (n Name) ToMixedCaps() string {
125	for i, word := range n {
126		if strings.EqualFold(word, "IDs") { // Special case, plural form of ID initialism.
127			n[i] = "IDs"
128			continue
129		}
130		if initialism, ok := isInitialism(word); ok {
131			n[i] = initialism
132			continue
133		}
134		if brand, ok := isBrand(word); ok {
135			n[i] = brand
136			continue
137		}
138		r, size := utf8.DecodeRuneInString(word)
139		n[i] = string(unicode.ToUpper(r)) + strings.ToLower(word[size:])
140	}
141	return strings.Join(n, "")
142}
143
144// ToLowerCamelCase expresses identifer name in lowerCamelCase naming convention.
145//
146// E.g., "clientMutationId".
147func (n Name) ToLowerCamelCase() string {
148	for i, word := range n {
149		if i == 0 {
150			n[i] = strings.ToLower(word)
151			continue
152		}
153		r, size := utf8.DecodeRuneInString(word)
154		n[i] = string(unicode.ToUpper(r)) + strings.ToLower(word[size:])
155	}
156	return strings.Join(n, "")
157}
158
159// isInitialism reports whether word is an initialism.
160func isInitialism(word string) (string, bool) {
161	initialism := strings.ToUpper(word)
162	_, ok := initialisms[initialism]
163	return initialism, ok
164}
165
166// isTwoInitialisms reports whether word is two initialisms.
167func isTwoInitialisms(word string) (string, string, bool) {
168	word = strings.ToUpper(word)
169	for i := 2; i <= len(word)-2; i++ { // Shortest initialism is 2 characters long.
170		_, ok1 := initialisms[word[:i]]
171		_, ok2 := initialisms[word[i:]]
172		if ok1 && ok2 {
173			return word[:i], word[i:], true
174		}
175	}
176	return "", "", false
177}
178
179// initialisms is the set of initialisms in the MixedCaps naming convention.
180// Only add entries that are highly unlikely to be non-initialisms.
181// For instance, "ID" is fine (Freudian code is rare), but "AND" is not.
182var initialisms = map[string]struct{}{
183	// These are the common initialisms from golint. Keep them in sync
184	// with https://gotools.org/github.com/golang/lint#commonInitialisms.
185	"ACL":   {},
186	"API":   {},
187	"ASCII": {},
188	"CPU":   {},
189	"CSS":   {},
190	"DNS":   {},
191	"EOF":   {},
192	"GUID":  {},
193	"HTML":  {},
194	"HTTP":  {},
195	"HTTPS": {},
196	"ID":    {},
197	"IP":    {},
198	"JSON":  {},
199	"LHS":   {},
200	"QPS":   {},
201	"RAM":   {},
202	"RHS":   {},
203	"RPC":   {},
204	"SLA":   {},
205	"SMTP":  {},
206	"SQL":   {},
207	"SSH":   {},
208	"TCP":   {},
209	"TLS":   {},
210	"TTL":   {},
211	"UDP":   {},
212	"UI":    {},
213	"UID":   {},
214	"UUID":  {},
215	"URI":   {},
216	"URL":   {},
217	"UTF8":  {},
218	"VM":    {},
219	"XML":   {},
220	"XMPP":  {},
221	"XSRF":  {},
222	"XSS":   {},
223
224	// Additional common initialisms.
225	"RSS": {},
226}
227
228// isBrand reports whether word is a brand.
229func isBrand(word string) (string, bool) {
230	brand, ok := brands[strings.ToLower(word)]
231	return brand, ok
232}
233
234// brands is the map of brands in the MixedCaps naming convention;
235// see https://dmitri.shuralyov.com/idiomatic-go#for-brands-or-words-with-more-than-1-capital-letter-lowercase-all-letters.
236// Key is the lower case version of the brand, value is the canonical brand spelling.
237// Only add entries that are highly unlikely to be non-brands.
238var brands = map[string]string{
239	"github": "GitHub",
240}