1// Package ident provides functions for parsing and converting identifier names
2// between various naming convention. It has support for MixedCaps, lowerCamelCase,
3// and SCREAMING_SNAKE_CASE naming conventions.
4package ident
5
6import (
7 "strings"
8 "unicode"
9 "unicode/utf8"
10)
11
12// ParseMixedCaps parses a MixedCaps identifier name.
13//
14// E.g., "ClientMutationID" -> {"Client", "Mutation", "ID"}.
15func ParseMixedCaps(name string) Name {
16 var words Name
17
18 // Split name at any lower -> Upper or Upper -> Upper,lower transitions.
19 // Check each word for initialisms.
20 runes := []rune(name)
21 w, i := 0, 0 // Index of start of word, scan.
22 for i+1 <= len(runes) {
23 eow := false // Whether we hit the end of a word.
24 if i+1 == len(runes) {
25 eow = true
26 } else if unicode.IsLower(runes[i]) && unicode.IsUpper(runes[i+1]) {
27 // lower -> Upper.
28 eow = true
29 } else if i+2 < len(runes) && unicode.IsUpper(runes[i]) && unicode.IsUpper(runes[i+1]) && unicode.IsLower(runes[i+2]) {
30 // Upper -> Upper,lower. End of acronym, followed by a word.
31 eow = true
32
33 if string(runes[i:i+3]) == "IDs" { // Special case, plural form of ID initialism.
34 eow = false
35 }
36 }
37 i++
38 if !eow {
39 continue
40 }
41
42 // [w, i) is a word.
43 word := string(runes[w:i])
44 if initialism, ok := isInitialism(word); ok {
45 words = append(words, initialism)
46 } else if i1, i2, ok := isTwoInitialisms(word); ok {
47 words = append(words, i1, i2)
48 } else {
49 words = append(words, word)
50 }
51 w = i
52 }
53 return words
54}
55
56// ParseLowerCamelCase parses a lowerCamelCase identifier name.
57//
58// E.g., "clientMutationId" -> {"client", "Mutation", "Id"}.
59func ParseLowerCamelCase(name string) Name {
60 var words Name
61
62 // Split name at any Upper letters.
63 runes := []rune(name)
64 w, i := 0, 0 // Index of start of word, scan.
65 for i+1 <= len(runes) {
66 eow := false // Whether we hit the end of a word.
67 if i+1 == len(runes) {
68 eow = true
69 } else if unicode.IsUpper(runes[i+1]) {
70 // Upper letter.
71 eow = true
72 }
73 i++
74 if !eow {
75 continue
76 }
77
78 // [w, i) is a word.
79 words = append(words, string(runes[w:i]))
80 w = i
81 }
82 return words
83}
84
85// ParseScreamingSnakeCase parses a SCREAMING_SNAKE_CASE identifier name.
86//
87// E.g., "CLIENT_MUTATION_ID" -> {"CLIENT", "MUTATION", "ID"}.
88func ParseScreamingSnakeCase(name string) Name {
89 var words Name
90
91 // Split name at '_' characters.
92 runes := []rune(name)
93 w, i := 0, 0 // Index of start of word, scan.
94 for i+1 <= len(runes) {
95 eow := false // Whether we hit the end of a word.
96 if i+1 == len(runes) {
97 eow = true
98 } else if runes[i+1] == '_' {
99 // Underscore.
100 eow = true
101 }
102 i++
103 if !eow {
104 continue
105 }
106
107 // [w, i) is a word.
108 words = append(words, string(runes[w:i]))
109 if i < len(runes) && runes[i] == '_' {
110 // Skip underscore.
111 i++
112 }
113 w = i
114 }
115 return words
116}
117
118// Name is an identifier name, broken up into individual words.
119type Name []string
120
121// ToMixedCaps expresses identifer name in MixedCaps naming convention.
122//
123// E.g., "ClientMutationID".
124func (n Name) ToMixedCaps() string {
125 for i, word := range n {
126 if strings.EqualFold(word, "IDs") { // Special case, plural form of ID initialism.
127 n[i] = "IDs"
128 continue
129 }
130 if initialism, ok := isInitialism(word); ok {
131 n[i] = initialism
132 continue
133 }
134 if brand, ok := isBrand(word); ok {
135 n[i] = brand
136 continue
137 }
138 r, size := utf8.DecodeRuneInString(word)
139 n[i] = string(unicode.ToUpper(r)) + strings.ToLower(word[size:])
140 }
141 return strings.Join(n, "")
142}
143
144// ToLowerCamelCase expresses identifer name in lowerCamelCase naming convention.
145//
146// E.g., "clientMutationId".
147func (n Name) ToLowerCamelCase() string {
148 for i, word := range n {
149 if i == 0 {
150 n[i] = strings.ToLower(word)
151 continue
152 }
153 r, size := utf8.DecodeRuneInString(word)
154 n[i] = string(unicode.ToUpper(r)) + strings.ToLower(word[size:])
155 }
156 return strings.Join(n, "")
157}
158
159// isInitialism reports whether word is an initialism.
160func isInitialism(word string) (string, bool) {
161 initialism := strings.ToUpper(word)
162 _, ok := initialisms[initialism]
163 return initialism, ok
164}
165
166// isTwoInitialisms reports whether word is two initialisms.
167func isTwoInitialisms(word string) (string, string, bool) {
168 word = strings.ToUpper(word)
169 for i := 2; i <= len(word)-2; i++ { // Shortest initialism is 2 characters long.
170 _, ok1 := initialisms[word[:i]]
171 _, ok2 := initialisms[word[i:]]
172 if ok1 && ok2 {
173 return word[:i], word[i:], true
174 }
175 }
176 return "", "", false
177}
178
179// initialisms is the set of initialisms in the MixedCaps naming convention.
180// Only add entries that are highly unlikely to be non-initialisms.
181// For instance, "ID" is fine (Freudian code is rare), but "AND" is not.
182var initialisms = map[string]struct{}{
183 // These are the common initialisms from golint. Keep them in sync
184 // with https://gotools.org/github.com/golang/lint#commonInitialisms.
185 "ACL": {},
186 "API": {},
187 "ASCII": {},
188 "CPU": {},
189 "CSS": {},
190 "DNS": {},
191 "EOF": {},
192 "GUID": {},
193 "HTML": {},
194 "HTTP": {},
195 "HTTPS": {},
196 "ID": {},
197 "IP": {},
198 "JSON": {},
199 "LHS": {},
200 "QPS": {},
201 "RAM": {},
202 "RHS": {},
203 "RPC": {},
204 "SLA": {},
205 "SMTP": {},
206 "SQL": {},
207 "SSH": {},
208 "TCP": {},
209 "TLS": {},
210 "TTL": {},
211 "UDP": {},
212 "UI": {},
213 "UID": {},
214 "UUID": {},
215 "URI": {},
216 "URL": {},
217 "UTF8": {},
218 "VM": {},
219 "XML": {},
220 "XMPP": {},
221 "XSRF": {},
222 "XSS": {},
223
224 // Additional common initialisms.
225 "RSS": {},
226}
227
228// isBrand reports whether word is a brand.
229func isBrand(word string) (string, bool) {
230 brand, ok := brands[strings.ToLower(word)]
231 return brand, ok
232}
233
234// brands is the map of brands in the MixedCaps naming convention;
235// see https://dmitri.shuralyov.com/idiomatic-go#for-brands-or-words-with-more-than-1-capital-letter-lowercase-all-letters.
236// Key is the lower case version of the brand, value is the canonical brand spelling.
237// Only add entries that are highly unlikely to be non-brands.
238var brands = map[string]string{
239 "github": "GitHub",
240}