encode.go

  1// Copyright 2018 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5package text
  6
  7import (
  8	"math"
  9	"math/bits"
 10	"strconv"
 11	"strings"
 12	"unicode/utf8"
 13
 14	"google.golang.org/protobuf/internal/detrand"
 15	"google.golang.org/protobuf/internal/errors"
 16)
 17
 18// encType represents an encoding type.
 19type encType uint8
 20
 21const (
 22	_ encType = (1 << iota) / 2
 23	name
 24	scalar
 25	messageOpen
 26	messageClose
 27)
 28
 29// Encoder provides methods to write out textproto constructs and values. The user is
 30// responsible for producing valid sequences of constructs and values.
 31type Encoder struct {
 32	encoderState
 33
 34	indent      string
 35	delims      [2]byte
 36	outputASCII bool
 37}
 38
 39type encoderState struct {
 40	lastType encType
 41	indents  []byte
 42	out      []byte
 43}
 44
 45// NewEncoder returns an Encoder.
 46//
 47// If indent is a non-empty string, it causes every entry in a List or Message
 48// to be preceded by the indent and trailed by a newline.
 49//
 50// If delims is not the zero value, it controls the delimiter characters used
 51// for messages (e.g., "{}" vs "<>").
 52//
 53// If outputASCII is true, strings will be serialized in such a way that
 54// multi-byte UTF-8 sequences are escaped. This property ensures that the
 55// overall output is ASCII (as opposed to UTF-8).
 56func NewEncoder(buf []byte, indent string, delims [2]byte, outputASCII bool) (*Encoder, error) {
 57	e := &Encoder{
 58		encoderState: encoderState{out: buf},
 59	}
 60	if len(indent) > 0 {
 61		if strings.Trim(indent, " \t") != "" {
 62			return nil, errors.New("indent may only be composed of space and tab characters")
 63		}
 64		e.indent = indent
 65	}
 66	switch delims {
 67	case [2]byte{0, 0}:
 68		e.delims = [2]byte{'{', '}'}
 69	case [2]byte{'{', '}'}, [2]byte{'<', '>'}:
 70		e.delims = delims
 71	default:
 72		return nil, errors.New("delimiters may only be \"{}\" or \"<>\"")
 73	}
 74	e.outputASCII = outputASCII
 75
 76	return e, nil
 77}
 78
 79// Bytes returns the content of the written bytes.
 80func (e *Encoder) Bytes() []byte {
 81	return e.out
 82}
 83
 84// StartMessage writes out the '{' or '<' symbol.
 85func (e *Encoder) StartMessage() {
 86	e.prepareNext(messageOpen)
 87	e.out = append(e.out, e.delims[0])
 88}
 89
 90// EndMessage writes out the '}' or '>' symbol.
 91func (e *Encoder) EndMessage() {
 92	e.prepareNext(messageClose)
 93	e.out = append(e.out, e.delims[1])
 94}
 95
 96// WriteName writes out the field name and the separator ':'.
 97func (e *Encoder) WriteName(s string) {
 98	e.prepareNext(name)
 99	e.out = append(e.out, s...)
100	e.out = append(e.out, ':')
101}
102
103// WriteBool writes out the given boolean value.
104func (e *Encoder) WriteBool(b bool) {
105	if b {
106		e.WriteLiteral("true")
107	} else {
108		e.WriteLiteral("false")
109	}
110}
111
112// WriteString writes out the given string value.
113func (e *Encoder) WriteString(s string) {
114	e.prepareNext(scalar)
115	e.out = appendString(e.out, s, e.outputASCII)
116}
117
118func appendString(out []byte, in string, outputASCII bool) []byte {
119	out = append(out, '"')
120	i := indexNeedEscapeInString(in)
121	in, out = in[i:], append(out, in[:i]...)
122	for len(in) > 0 {
123		switch r, n := utf8.DecodeRuneInString(in); {
124		case r == utf8.RuneError && n == 1:
125			// We do not report invalid UTF-8 because strings in the text format
126			// are used to represent both the proto string and bytes type.
127			r = rune(in[0])
128			fallthrough
129		case r < ' ' || r == '"' || r == '\\' || r == 0x7f:
130			out = append(out, '\\')
131			switch r {
132			case '"', '\\':
133				out = append(out, byte(r))
134			case '\n':
135				out = append(out, 'n')
136			case '\r':
137				out = append(out, 'r')
138			case '\t':
139				out = append(out, 't')
140			default:
141				out = append(out, 'x')
142				out = append(out, "00"[1+(bits.Len32(uint32(r))-1)/4:]...)
143				out = strconv.AppendUint(out, uint64(r), 16)
144			}
145			in = in[n:]
146		case r >= utf8.RuneSelf && (outputASCII || r <= 0x009f):
147			out = append(out, '\\')
148			if r <= math.MaxUint16 {
149				out = append(out, 'u')
150				out = append(out, "0000"[1+(bits.Len32(uint32(r))-1)/4:]...)
151				out = strconv.AppendUint(out, uint64(r), 16)
152			} else {
153				out = append(out, 'U')
154				out = append(out, "00000000"[1+(bits.Len32(uint32(r))-1)/4:]...)
155				out = strconv.AppendUint(out, uint64(r), 16)
156			}
157			in = in[n:]
158		default:
159			i := indexNeedEscapeInString(in[n:])
160			in, out = in[n+i:], append(out, in[:n+i]...)
161		}
162	}
163	out = append(out, '"')
164	return out
165}
166
167// indexNeedEscapeInString returns the index of the character that needs
168// escaping. If no characters need escaping, this returns the input length.
169func indexNeedEscapeInString(s string) int {
170	for i := 0; i < len(s); i++ {
171		if c := s[i]; c < ' ' || c == '"' || c == '\'' || c == '\\' || c >= 0x7f {
172			return i
173		}
174	}
175	return len(s)
176}
177
178// WriteFloat writes out the given float value for given bitSize.
179func (e *Encoder) WriteFloat(n float64, bitSize int) {
180	e.prepareNext(scalar)
181	e.out = appendFloat(e.out, n, bitSize)
182}
183
184func appendFloat(out []byte, n float64, bitSize int) []byte {
185	switch {
186	case math.IsNaN(n):
187		return append(out, "nan"...)
188	case math.IsInf(n, +1):
189		return append(out, "inf"...)
190	case math.IsInf(n, -1):
191		return append(out, "-inf"...)
192	default:
193		return strconv.AppendFloat(out, n, 'g', -1, bitSize)
194	}
195}
196
197// WriteInt writes out the given signed integer value.
198func (e *Encoder) WriteInt(n int64) {
199	e.prepareNext(scalar)
200	e.out = strconv.AppendInt(e.out, n, 10)
201}
202
203// WriteUint writes out the given unsigned integer value.
204func (e *Encoder) WriteUint(n uint64) {
205	e.prepareNext(scalar)
206	e.out = strconv.AppendUint(e.out, n, 10)
207}
208
209// WriteLiteral writes out the given string as a literal value without quotes.
210// This is used for writing enum literal strings.
211func (e *Encoder) WriteLiteral(s string) {
212	e.prepareNext(scalar)
213	e.out = append(e.out, s...)
214}
215
216// prepareNext adds possible space and indentation for the next value based
217// on last encType and indent option. It also updates e.lastType to next.
218func (e *Encoder) prepareNext(next encType) {
219	defer func() {
220		e.lastType = next
221	}()
222
223	// Single line.
224	if len(e.indent) == 0 {
225		// Add space after each field before the next one.
226		if e.lastType&(scalar|messageClose) != 0 && next == name {
227			e.out = append(e.out, ' ')
228			// Add a random extra space to make output unstable.
229			if detrand.Bool() {
230				e.out = append(e.out, ' ')
231			}
232		}
233		return
234	}
235
236	// Multi-line.
237	switch {
238	case e.lastType == name:
239		e.out = append(e.out, ' ')
240		// Add a random extra space after name: to make output unstable.
241		if detrand.Bool() {
242			e.out = append(e.out, ' ')
243		}
244
245	case e.lastType == messageOpen && next != messageClose:
246		e.indents = append(e.indents, e.indent...)
247		e.out = append(e.out, '\n')
248		e.out = append(e.out, e.indents...)
249
250	case e.lastType&(scalar|messageClose) != 0:
251		if next == messageClose {
252			e.indents = e.indents[:len(e.indents)-len(e.indent)]
253		}
254		e.out = append(e.out, '\n')
255		e.out = append(e.out, e.indents...)
256	}
257}
258
259// Snapshot returns the current snapshot for use in Reset.
260func (e *Encoder) Snapshot() encoderState {
261	return e.encoderState
262}
263
264// Reset resets the Encoder to the given encoderState from a Snapshot.
265func (e *Encoder) Reset(es encoderState) {
266	e.encoderState = es
267}
268
269// AppendString appends the escaped form of the input string to b.
270func AppendString(b []byte, s string) []byte {
271	return appendString(b, s, false)
272}