encode.go

  1// Copyright 2019 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5package proto
  6
  7import (
  8	"errors"
  9	"fmt"
 10
 11	"google.golang.org/protobuf/encoding/protowire"
 12	"google.golang.org/protobuf/internal/encoding/messageset"
 13	"google.golang.org/protobuf/internal/order"
 14	"google.golang.org/protobuf/internal/pragma"
 15	"google.golang.org/protobuf/reflect/protoreflect"
 16	"google.golang.org/protobuf/runtime/protoiface"
 17
 18	protoerrors "google.golang.org/protobuf/internal/errors"
 19)
 20
 21// MarshalOptions configures the marshaler.
 22//
 23// Example usage:
 24//
 25//	b, err := MarshalOptions{Deterministic: true}.Marshal(m)
 26type MarshalOptions struct {
 27	pragma.NoUnkeyedLiterals
 28
 29	// AllowPartial allows messages that have missing required fields to marshal
 30	// without returning an error. If AllowPartial is false (the default),
 31	// Marshal will return an error if there are any missing required fields.
 32	AllowPartial bool
 33
 34	// Deterministic controls whether the same message will always be
 35	// serialized to the same bytes within the same binary.
 36	//
 37	// Setting this option guarantees that repeated serialization of
 38	// the same message will return the same bytes, and that different
 39	// processes of the same binary (which may be executing on different
 40	// machines) will serialize equal messages to the same bytes.
 41	// It has no effect on the resulting size of the encoded message compared
 42	// to a non-deterministic marshal.
 43	//
 44	// Note that the deterministic serialization is NOT canonical across
 45	// languages. It is not guaranteed to remain stable over time. It is
 46	// unstable across different builds with schema changes due to unknown
 47	// fields. Users who need canonical serialization (e.g., persistent
 48	// storage in a canonical form, fingerprinting, etc.) must define
 49	// their own canonicalization specification and implement their own
 50	// serializer rather than relying on this API.
 51	//
 52	// If deterministic serialization is requested, map entries will be
 53	// sorted by keys in lexographical order. This is an implementation
 54	// detail and subject to change.
 55	Deterministic bool
 56
 57	// UseCachedSize indicates that the result of a previous Size call
 58	// may be reused.
 59	//
 60	// Setting this option asserts that:
 61	//
 62	// 1. Size has previously been called on this message with identical
 63	// options (except for UseCachedSize itself).
 64	//
 65	// 2. The message and all its submessages have not changed in any
 66	// way since the Size call. For lazily decoded messages, accessing
 67	// a message results in decoding the message, which is a change.
 68	//
 69	// If either of these invariants is violated,
 70	// the results are undefined and may include panics or corrupted output.
 71	//
 72	// Implementations MAY take this option into account to provide
 73	// better performance, but there is no guarantee that they will do so.
 74	// There is absolutely no guarantee that Size followed by Marshal with
 75	// UseCachedSize set will perform equivalently to Marshal alone.
 76	UseCachedSize bool
 77}
 78
 79// flags turns the specified MarshalOptions (user-facing) into
 80// protoiface.MarshalInputFlags (used internally by the marshaler).
 81//
 82// See impl.marshalOptions.Options for the inverse operation.
 83func (o MarshalOptions) flags() protoiface.MarshalInputFlags {
 84	var flags protoiface.MarshalInputFlags
 85
 86	// Note: o.AllowPartial is always forced to true by MarshalOptions.marshal,
 87	// which is why it is not a part of MarshalInputFlags.
 88
 89	if o.Deterministic {
 90		flags |= protoiface.MarshalDeterministic
 91	}
 92
 93	if o.UseCachedSize {
 94		flags |= protoiface.MarshalUseCachedSize
 95	}
 96
 97	return flags
 98}
 99
100// Marshal returns the wire-format encoding of m.
101//
102// This is the most common entry point for encoding a Protobuf message.
103//
104// See the [MarshalOptions] type if you need more control.
105func Marshal(m Message) ([]byte, error) {
106	// Treat nil message interface as an empty message; nothing to output.
107	if m == nil {
108		return nil, nil
109	}
110
111	out, err := MarshalOptions{}.marshal(nil, m.ProtoReflect())
112	if len(out.Buf) == 0 && err == nil {
113		out.Buf = emptyBytesForMessage(m)
114	}
115	return out.Buf, err
116}
117
118// Marshal returns the wire-format encoding of m.
119func (o MarshalOptions) Marshal(m Message) ([]byte, error) {
120	// Treat nil message interface as an empty message; nothing to output.
121	if m == nil {
122		return nil, nil
123	}
124
125	out, err := o.marshal(nil, m.ProtoReflect())
126	if len(out.Buf) == 0 && err == nil {
127		out.Buf = emptyBytesForMessage(m)
128	}
129	return out.Buf, err
130}
131
132// emptyBytesForMessage returns a nil buffer if and only if m is invalid,
133// otherwise it returns a non-nil empty buffer.
134//
135// This is to assist the edge-case where user-code does the following:
136//
137//	m1.OptionalBytes, _ = proto.Marshal(m2)
138//
139// where they expect the proto2 "optional_bytes" field to be populated
140// if any only if m2 is a valid message.
141func emptyBytesForMessage(m Message) []byte {
142	if m == nil || !m.ProtoReflect().IsValid() {
143		return nil
144	}
145	return emptyBuf[:]
146}
147
148// MarshalAppend appends the wire-format encoding of m to b,
149// returning the result.
150//
151// This is a less common entry point than [Marshal], which is only needed if you
152// need to supply your own buffers for performance reasons.
153func (o MarshalOptions) MarshalAppend(b []byte, m Message) ([]byte, error) {
154	// Treat nil message interface as an empty message; nothing to append.
155	if m == nil {
156		return b, nil
157	}
158
159	out, err := o.marshal(b, m.ProtoReflect())
160	return out.Buf, err
161}
162
163// MarshalState returns the wire-format encoding of a message.
164//
165// This method permits fine-grained control over the marshaler.
166// Most users should use [Marshal] instead.
167func (o MarshalOptions) MarshalState(in protoiface.MarshalInput) (protoiface.MarshalOutput, error) {
168	return o.marshal(in.Buf, in.Message)
169}
170
171// marshal is a centralized function that all marshal operations go through.
172// For profiling purposes, avoid changing the name of this function or
173// introducing other code paths for marshal that do not go through this.
174func (o MarshalOptions) marshal(b []byte, m protoreflect.Message) (out protoiface.MarshalOutput, err error) {
175	allowPartial := o.AllowPartial
176	o.AllowPartial = true
177	if methods := protoMethods(m); methods != nil && methods.Marshal != nil &&
178		!(o.Deterministic && methods.Flags&protoiface.SupportMarshalDeterministic == 0) {
179		in := protoiface.MarshalInput{
180			Message: m,
181			Buf:     b,
182			Flags:   o.flags(),
183		}
184		if methods.Size != nil {
185			sout := methods.Size(protoiface.SizeInput{
186				Message: m,
187				Flags:   in.Flags,
188			})
189			if cap(b) < len(b)+sout.Size {
190				in.Buf = make([]byte, len(b), growcap(cap(b), len(b)+sout.Size))
191				copy(in.Buf, b)
192			}
193			in.Flags |= protoiface.MarshalUseCachedSize
194		}
195		out, err = methods.Marshal(in)
196	} else {
197		out.Buf, err = o.marshalMessageSlow(b, m)
198	}
199	if err != nil {
200		var mismatch *protoerrors.SizeMismatchError
201		if errors.As(err, &mismatch) {
202			return out, fmt.Errorf("marshaling %s: %v", string(m.Descriptor().FullName()), err)
203		}
204		return out, err
205	}
206	if allowPartial {
207		return out, nil
208	}
209	return out, checkInitialized(m)
210}
211
212func (o MarshalOptions) marshalMessage(b []byte, m protoreflect.Message) ([]byte, error) {
213	out, err := o.marshal(b, m)
214	return out.Buf, err
215}
216
217// growcap scales up the capacity of a slice.
218//
219// Given a slice with a current capacity of oldcap and a desired
220// capacity of wantcap, growcap returns a new capacity >= wantcap.
221//
222// The algorithm is mostly identical to the one used by append as of Go 1.14.
223func growcap(oldcap, wantcap int) (newcap int) {
224	if wantcap > oldcap*2 {
225		newcap = wantcap
226	} else if oldcap < 1024 {
227		// The Go 1.14 runtime takes this case when len(s) < 1024,
228		// not when cap(s) < 1024. The difference doesn't seem
229		// significant here.
230		newcap = oldcap * 2
231	} else {
232		newcap = oldcap
233		for 0 < newcap && newcap < wantcap {
234			newcap += newcap / 4
235		}
236		if newcap <= 0 {
237			newcap = wantcap
238		}
239	}
240	return newcap
241}
242
243func (o MarshalOptions) marshalMessageSlow(b []byte, m protoreflect.Message) ([]byte, error) {
244	if messageset.IsMessageSet(m.Descriptor()) {
245		return o.marshalMessageSet(b, m)
246	}
247	fieldOrder := order.AnyFieldOrder
248	if o.Deterministic {
249		// TODO: This should use a more natural ordering like NumberFieldOrder,
250		// but doing so breaks golden tests that make invalid assumption about
251		// output stability of this implementation.
252		fieldOrder = order.LegacyFieldOrder
253	}
254	var err error
255	order.RangeFields(m, fieldOrder, func(fd protoreflect.FieldDescriptor, v protoreflect.Value) bool {
256		b, err = o.marshalField(b, fd, v)
257		return err == nil
258	})
259	if err != nil {
260		return b, err
261	}
262	b = append(b, m.GetUnknown()...)
263	return b, nil
264}
265
266func (o MarshalOptions) marshalField(b []byte, fd protoreflect.FieldDescriptor, value protoreflect.Value) ([]byte, error) {
267	switch {
268	case fd.IsList():
269		return o.marshalList(b, fd, value.List())
270	case fd.IsMap():
271		return o.marshalMap(b, fd, value.Map())
272	default:
273		b = protowire.AppendTag(b, fd.Number(), wireTypes[fd.Kind()])
274		return o.marshalSingular(b, fd, value)
275	}
276}
277
278func (o MarshalOptions) marshalList(b []byte, fd protoreflect.FieldDescriptor, list protoreflect.List) ([]byte, error) {
279	if fd.IsPacked() && list.Len() > 0 {
280		b = protowire.AppendTag(b, fd.Number(), protowire.BytesType)
281		b, pos := appendSpeculativeLength(b)
282		for i, llen := 0, list.Len(); i < llen; i++ {
283			var err error
284			b, err = o.marshalSingular(b, fd, list.Get(i))
285			if err != nil {
286				return b, err
287			}
288		}
289		b = finishSpeculativeLength(b, pos)
290		return b, nil
291	}
292
293	kind := fd.Kind()
294	for i, llen := 0, list.Len(); i < llen; i++ {
295		var err error
296		b = protowire.AppendTag(b, fd.Number(), wireTypes[kind])
297		b, err = o.marshalSingular(b, fd, list.Get(i))
298		if err != nil {
299			return b, err
300		}
301	}
302	return b, nil
303}
304
305func (o MarshalOptions) marshalMap(b []byte, fd protoreflect.FieldDescriptor, mapv protoreflect.Map) ([]byte, error) {
306	keyf := fd.MapKey()
307	valf := fd.MapValue()
308	keyOrder := order.AnyKeyOrder
309	if o.Deterministic {
310		keyOrder = order.GenericKeyOrder
311	}
312	var err error
313	order.RangeEntries(mapv, keyOrder, func(key protoreflect.MapKey, value protoreflect.Value) bool {
314		b = protowire.AppendTag(b, fd.Number(), protowire.BytesType)
315		var pos int
316		b, pos = appendSpeculativeLength(b)
317
318		b, err = o.marshalField(b, keyf, key.Value())
319		if err != nil {
320			return false
321		}
322		b, err = o.marshalField(b, valf, value)
323		if err != nil {
324			return false
325		}
326		b = finishSpeculativeLength(b, pos)
327		return true
328	})
329	return b, err
330}
331
332// When encoding length-prefixed fields, we speculatively set aside some number of bytes
333// for the length, encode the data, and then encode the length (shifting the data if necessary
334// to make room).
335const speculativeLength = 1
336
337func appendSpeculativeLength(b []byte) ([]byte, int) {
338	pos := len(b)
339	b = append(b, "\x00\x00\x00\x00"[:speculativeLength]...)
340	return b, pos
341}
342
343func finishSpeculativeLength(b []byte, pos int) []byte {
344	mlen := len(b) - pos - speculativeLength
345	msiz := protowire.SizeVarint(uint64(mlen))
346	if msiz != speculativeLength {
347		for i := 0; i < msiz-speculativeLength; i++ {
348			b = append(b, 0)
349		}
350		copy(b[pos+msiz:], b[pos+speculativeLength:])
351		b = b[:pos+msiz+mlen]
352	}
353	protowire.AppendVarint(b[:pos], uint64(mlen))
354	return b
355}