strings.go

  1// Copyright 2019 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5// Package strs provides string manipulation functionality specific to protobuf.
  6package strs
  7
  8import (
  9	"go/token"
 10	"strings"
 11	"unicode"
 12	"unicode/utf8"
 13
 14	"google.golang.org/protobuf/internal/flags"
 15	"google.golang.org/protobuf/reflect/protoreflect"
 16)
 17
 18// EnforceUTF8 reports whether to enforce strict UTF-8 validation.
 19func EnforceUTF8(fd protoreflect.FieldDescriptor) bool {
 20	if flags.ProtoLegacy || fd.Syntax() == protoreflect.Editions {
 21		if fd, ok := fd.(interface{ EnforceUTF8() bool }); ok {
 22			return fd.EnforceUTF8()
 23		}
 24	}
 25	return fd.Syntax() == protoreflect.Proto3
 26}
 27
 28// GoCamelCase camel-cases a protobuf name for use as a Go identifier.
 29//
 30// If there is an interior underscore followed by a lower case letter,
 31// drop the underscore and convert the letter to upper case.
 32func GoCamelCase(s string) string {
 33	// Invariant: if the next letter is lower case, it must be converted
 34	// to upper case.
 35	// That is, we process a word at a time, where words are marked by _ or
 36	// upper case letter. Digits are treated as words.
 37	var b []byte
 38	for i := 0; i < len(s); i++ {
 39		c := s[i]
 40		switch {
 41		case c == '.' && i+1 < len(s) && isASCIILower(s[i+1]):
 42			// Skip over '.' in ".{{lowercase}}".
 43		case c == '.':
 44			b = append(b, '_') // convert '.' to '_'
 45		case c == '_' && (i == 0 || s[i-1] == '.'):
 46			// Convert initial '_' to ensure we start with a capital letter.
 47			// Do the same for '_' after '.' to match historic behavior.
 48			b = append(b, 'X') // convert '_' to 'X'
 49		case c == '_' && i+1 < len(s) && isASCIILower(s[i+1]):
 50			// Skip over '_' in "_{{lowercase}}".
 51		case isASCIIDigit(c):
 52			b = append(b, c)
 53		default:
 54			// Assume we have a letter now - if not, it's a bogus identifier.
 55			// The next word is a sequence of characters that must start upper case.
 56			if isASCIILower(c) {
 57				c -= 'a' - 'A' // convert lowercase to uppercase
 58			}
 59			b = append(b, c)
 60
 61			// Accept lower case sequence that follows.
 62			for ; i+1 < len(s) && isASCIILower(s[i+1]); i++ {
 63				b = append(b, s[i+1])
 64			}
 65		}
 66	}
 67	return string(b)
 68}
 69
 70// GoSanitized converts a string to a valid Go identifier.
 71func GoSanitized(s string) string {
 72	// Sanitize the input to the set of valid characters,
 73	// which must be '_' or be in the Unicode L or N categories.
 74	s = strings.Map(func(r rune) rune {
 75		if unicode.IsLetter(r) || unicode.IsDigit(r) {
 76			return r
 77		}
 78		return '_'
 79	}, s)
 80
 81	// Prepend '_' in the event of a Go keyword conflict or if
 82	// the identifier is invalid (does not start in the Unicode L category).
 83	r, _ := utf8.DecodeRuneInString(s)
 84	if token.Lookup(s).IsKeyword() || !unicode.IsLetter(r) {
 85		return "_" + s
 86	}
 87	return s
 88}
 89
 90// JSONCamelCase converts a snake_case identifier to a camelCase identifier,
 91// according to the protobuf JSON specification.
 92func JSONCamelCase(s string) string {
 93	var b []byte
 94	var wasUnderscore bool
 95	for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
 96		c := s[i]
 97		if c != '_' {
 98			if wasUnderscore && isASCIILower(c) {
 99				c -= 'a' - 'A' // convert to uppercase
100			}
101			b = append(b, c)
102		}
103		wasUnderscore = c == '_'
104	}
105	return string(b)
106}
107
108// JSONSnakeCase converts a camelCase identifier to a snake_case identifier,
109// according to the protobuf JSON specification.
110func JSONSnakeCase(s string) string {
111	var b []byte
112	for i := 0; i < len(s); i++ { // proto identifiers are always ASCII
113		c := s[i]
114		if isASCIIUpper(c) {
115			b = append(b, '_')
116			c += 'a' - 'A' // convert to lowercase
117		}
118		b = append(b, c)
119	}
120	return string(b)
121}
122
123// MapEntryName derives the name of the map entry message given the field name.
124// See protoc v3.8.0: src/google/protobuf/descriptor.cc:254-276,6057
125func MapEntryName(s string) string {
126	var b []byte
127	upperNext := true
128	for _, c := range s {
129		switch {
130		case c == '_':
131			upperNext = true
132		case upperNext:
133			b = append(b, byte(unicode.ToUpper(c)))
134			upperNext = false
135		default:
136			b = append(b, byte(c))
137		}
138	}
139	b = append(b, "Entry"...)
140	return string(b)
141}
142
143// EnumValueName derives the camel-cased enum value name.
144// See protoc v3.8.0: src/google/protobuf/descriptor.cc:297-313
145func EnumValueName(s string) string {
146	var b []byte
147	upperNext := true
148	for _, c := range s {
149		switch {
150		case c == '_':
151			upperNext = true
152		case upperNext:
153			b = append(b, byte(unicode.ToUpper(c)))
154			upperNext = false
155		default:
156			b = append(b, byte(unicode.ToLower(c)))
157			upperNext = false
158		}
159	}
160	return string(b)
161}
162
163// TrimEnumPrefix trims the enum name prefix from an enum value name,
164// where the prefix is all lowercase without underscores.
165// See protoc v3.8.0: src/google/protobuf/descriptor.cc:330-375
166func TrimEnumPrefix(s, prefix string) string {
167	s0 := s // original input
168	for len(s) > 0 && len(prefix) > 0 {
169		if s[0] == '_' {
170			s = s[1:]
171			continue
172		}
173		if unicode.ToLower(rune(s[0])) != rune(prefix[0]) {
174			return s0 // no prefix match
175		}
176		s, prefix = s[1:], prefix[1:]
177	}
178	if len(prefix) > 0 {
179		return s0 // no prefix match
180	}
181	s = strings.TrimLeft(s, "_")
182	if len(s) == 0 {
183		return s0 // avoid returning empty string
184	}
185	return s
186}
187
188func isASCIILower(c byte) bool {
189	return 'a' <= c && c <= 'z'
190}
191func isASCIIUpper(c byte) bool {
192	return 'A' <= c && c <= 'Z'
193}
194func isASCIIDigit(c byte) bool {
195	return '0' <= c && c <= '9'
196}