decode.go

  1// Copyright 2019 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5package protojson
  6
  7import (
  8	"encoding/base64"
  9	"fmt"
 10	"math"
 11	"strconv"
 12	"strings"
 13
 14	"google.golang.org/protobuf/encoding/protowire"
 15	"google.golang.org/protobuf/internal/encoding/json"
 16	"google.golang.org/protobuf/internal/encoding/messageset"
 17	"google.golang.org/protobuf/internal/errors"
 18	"google.golang.org/protobuf/internal/flags"
 19	"google.golang.org/protobuf/internal/genid"
 20	"google.golang.org/protobuf/internal/pragma"
 21	"google.golang.org/protobuf/internal/set"
 22	"google.golang.org/protobuf/proto"
 23	"google.golang.org/protobuf/reflect/protoreflect"
 24	"google.golang.org/protobuf/reflect/protoregistry"
 25)
 26
 27// Unmarshal reads the given []byte into the given [proto.Message].
 28// The provided message must be mutable (e.g., a non-nil pointer to a message).
 29func Unmarshal(b []byte, m proto.Message) error {
 30	return UnmarshalOptions{}.Unmarshal(b, m)
 31}
 32
 33// UnmarshalOptions is a configurable JSON format parser.
 34type UnmarshalOptions struct {
 35	pragma.NoUnkeyedLiterals
 36
 37	// If AllowPartial is set, input for messages that will result in missing
 38	// required fields will not return an error.
 39	AllowPartial bool
 40
 41	// If DiscardUnknown is set, unknown fields and enum name values are ignored.
 42	DiscardUnknown bool
 43
 44	// Resolver is used for looking up types when unmarshaling
 45	// google.protobuf.Any messages or extension fields.
 46	// If nil, this defaults to using protoregistry.GlobalTypes.
 47	Resolver interface {
 48		protoregistry.MessageTypeResolver
 49		protoregistry.ExtensionTypeResolver
 50	}
 51
 52	// RecursionLimit limits how deeply messages may be nested.
 53	// If zero, a default limit is applied.
 54	RecursionLimit int
 55}
 56
 57// Unmarshal reads the given []byte and populates the given [proto.Message]
 58// using options in the UnmarshalOptions object.
 59// It will clear the message first before setting the fields.
 60// If it returns an error, the given message may be partially set.
 61// The provided message must be mutable (e.g., a non-nil pointer to a message).
 62func (o UnmarshalOptions) Unmarshal(b []byte, m proto.Message) error {
 63	return o.unmarshal(b, m)
 64}
 65
 66// unmarshal is a centralized function that all unmarshal operations go through.
 67// For profiling purposes, avoid changing the name of this function or
 68// introducing other code paths for unmarshal that do not go through this.
 69func (o UnmarshalOptions) unmarshal(b []byte, m proto.Message) error {
 70	proto.Reset(m)
 71
 72	if o.Resolver == nil {
 73		o.Resolver = protoregistry.GlobalTypes
 74	}
 75	if o.RecursionLimit == 0 {
 76		o.RecursionLimit = protowire.DefaultRecursionLimit
 77	}
 78
 79	dec := decoder{json.NewDecoder(b), o}
 80	if err := dec.unmarshalMessage(m.ProtoReflect(), false); err != nil {
 81		return err
 82	}
 83
 84	// Check for EOF.
 85	tok, err := dec.Read()
 86	if err != nil {
 87		return err
 88	}
 89	if tok.Kind() != json.EOF {
 90		return dec.unexpectedTokenError(tok)
 91	}
 92
 93	if o.AllowPartial {
 94		return nil
 95	}
 96	return proto.CheckInitialized(m)
 97}
 98
 99type decoder struct {
100	*json.Decoder
101	opts UnmarshalOptions
102}
103
104// newError returns an error object with position info.
105func (d decoder) newError(pos int, f string, x ...any) error {
106	line, column := d.Position(pos)
107	head := fmt.Sprintf("(line %d:%d): ", line, column)
108	return errors.New(head+f, x...)
109}
110
111// unexpectedTokenError returns a syntax error for the given unexpected token.
112func (d decoder) unexpectedTokenError(tok json.Token) error {
113	return d.syntaxError(tok.Pos(), "unexpected token %s", tok.RawString())
114}
115
116// syntaxError returns a syntax error for given position.
117func (d decoder) syntaxError(pos int, f string, x ...any) error {
118	line, column := d.Position(pos)
119	head := fmt.Sprintf("syntax error (line %d:%d): ", line, column)
120	return errors.New(head+f, x...)
121}
122
123// unmarshalMessage unmarshals a message into the given protoreflect.Message.
124func (d decoder) unmarshalMessage(m protoreflect.Message, skipTypeURL bool) error {
125	d.opts.RecursionLimit--
126	if d.opts.RecursionLimit < 0 {
127		return errors.New("exceeded max recursion depth")
128	}
129	if unmarshal := wellKnownTypeUnmarshaler(m.Descriptor().FullName()); unmarshal != nil {
130		return unmarshal(d, m)
131	}
132
133	tok, err := d.Read()
134	if err != nil {
135		return err
136	}
137	if tok.Kind() != json.ObjectOpen {
138		return d.unexpectedTokenError(tok)
139	}
140
141	messageDesc := m.Descriptor()
142	if !flags.ProtoLegacy && messageset.IsMessageSet(messageDesc) {
143		return errors.New("no support for proto1 MessageSets")
144	}
145
146	var seenNums set.Ints
147	var seenOneofs set.Ints
148	fieldDescs := messageDesc.Fields()
149	for {
150		// Read field name.
151		tok, err := d.Read()
152		if err != nil {
153			return err
154		}
155		switch tok.Kind() {
156		default:
157			return d.unexpectedTokenError(tok)
158		case json.ObjectClose:
159			return nil
160		case json.Name:
161			// Continue below.
162		}
163
164		name := tok.Name()
165		// Unmarshaling a non-custom embedded message in Any will contain the
166		// JSON field "@type" which should be skipped because it is not a field
167		// of the embedded message, but simply an artifact of the Any format.
168		if skipTypeURL && name == "@type" {
169			d.Read()
170			continue
171		}
172
173		// Get the FieldDescriptor.
174		var fd protoreflect.FieldDescriptor
175		if strings.HasPrefix(name, "[") && strings.HasSuffix(name, "]") {
176			// Only extension names are in [name] format.
177			extName := protoreflect.FullName(name[1 : len(name)-1])
178			extType, err := d.opts.Resolver.FindExtensionByName(extName)
179			if err != nil && err != protoregistry.NotFound {
180				return d.newError(tok.Pos(), "unable to resolve %s: %v", tok.RawString(), err)
181			}
182			if extType != nil {
183				fd = extType.TypeDescriptor()
184				if !messageDesc.ExtensionRanges().Has(fd.Number()) || fd.ContainingMessage().FullName() != messageDesc.FullName() {
185					return d.newError(tok.Pos(), "message %v cannot be extended by %v", messageDesc.FullName(), fd.FullName())
186				}
187			}
188		} else {
189			// The name can either be the JSON name or the proto field name.
190			fd = fieldDescs.ByJSONName(name)
191			if fd == nil {
192				fd = fieldDescs.ByTextName(name)
193			}
194		}
195
196		if fd == nil {
197			// Field is unknown.
198			if d.opts.DiscardUnknown {
199				if err := d.skipJSONValue(); err != nil {
200					return err
201				}
202				continue
203			}
204			return d.newError(tok.Pos(), "unknown field %v", tok.RawString())
205		}
206
207		// Do not allow duplicate fields.
208		num := uint64(fd.Number())
209		if seenNums.Has(num) {
210			return d.newError(tok.Pos(), "duplicate field %v", tok.RawString())
211		}
212		seenNums.Set(num)
213
214		// No need to set values for JSON null unless the field type is
215		// google.protobuf.Value or google.protobuf.NullValue.
216		if tok, _ := d.Peek(); tok.Kind() == json.Null && !isKnownValue(fd) && !isNullValue(fd) {
217			d.Read()
218			continue
219		}
220
221		switch {
222		case fd.IsList():
223			list := m.Mutable(fd).List()
224			if err := d.unmarshalList(list, fd); err != nil {
225				return err
226			}
227		case fd.IsMap():
228			mmap := m.Mutable(fd).Map()
229			if err := d.unmarshalMap(mmap, fd); err != nil {
230				return err
231			}
232		default:
233			// If field is a oneof, check if it has already been set.
234			if od := fd.ContainingOneof(); od != nil {
235				idx := uint64(od.Index())
236				if seenOneofs.Has(idx) {
237					return d.newError(tok.Pos(), "error parsing %s, oneof %v is already set", tok.RawString(), od.FullName())
238				}
239				seenOneofs.Set(idx)
240			}
241
242			// Required or optional fields.
243			if err := d.unmarshalSingular(m, fd); err != nil {
244				return err
245			}
246		}
247	}
248}
249
250func isKnownValue(fd protoreflect.FieldDescriptor) bool {
251	md := fd.Message()
252	return md != nil && md.FullName() == genid.Value_message_fullname
253}
254
255func isNullValue(fd protoreflect.FieldDescriptor) bool {
256	ed := fd.Enum()
257	return ed != nil && ed.FullName() == genid.NullValue_enum_fullname
258}
259
260// unmarshalSingular unmarshals to the non-repeated field specified
261// by the given FieldDescriptor.
262func (d decoder) unmarshalSingular(m protoreflect.Message, fd protoreflect.FieldDescriptor) error {
263	var val protoreflect.Value
264	var err error
265	switch fd.Kind() {
266	case protoreflect.MessageKind, protoreflect.GroupKind:
267		val = m.NewField(fd)
268		err = d.unmarshalMessage(val.Message(), false)
269	default:
270		val, err = d.unmarshalScalar(fd)
271	}
272
273	if err != nil {
274		return err
275	}
276	if val.IsValid() {
277		m.Set(fd, val)
278	}
279	return nil
280}
281
282// unmarshalScalar unmarshals to a scalar/enum protoreflect.Value specified by
283// the given FieldDescriptor.
284func (d decoder) unmarshalScalar(fd protoreflect.FieldDescriptor) (protoreflect.Value, error) {
285	const b32 int = 32
286	const b64 int = 64
287
288	tok, err := d.Read()
289	if err != nil {
290		return protoreflect.Value{}, err
291	}
292
293	kind := fd.Kind()
294	switch kind {
295	case protoreflect.BoolKind:
296		if tok.Kind() == json.Bool {
297			return protoreflect.ValueOfBool(tok.Bool()), nil
298		}
299
300	case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind:
301		if v, ok := unmarshalInt(tok, b32); ok {
302			return v, nil
303		}
304
305	case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind:
306		if v, ok := unmarshalInt(tok, b64); ok {
307			return v, nil
308		}
309
310	case protoreflect.Uint32Kind, protoreflect.Fixed32Kind:
311		if v, ok := unmarshalUint(tok, b32); ok {
312			return v, nil
313		}
314
315	case protoreflect.Uint64Kind, protoreflect.Fixed64Kind:
316		if v, ok := unmarshalUint(tok, b64); ok {
317			return v, nil
318		}
319
320	case protoreflect.FloatKind:
321		if v, ok := unmarshalFloat(tok, b32); ok {
322			return v, nil
323		}
324
325	case protoreflect.DoubleKind:
326		if v, ok := unmarshalFloat(tok, b64); ok {
327			return v, nil
328		}
329
330	case protoreflect.StringKind:
331		if tok.Kind() == json.String {
332			return protoreflect.ValueOfString(tok.ParsedString()), nil
333		}
334
335	case protoreflect.BytesKind:
336		if v, ok := unmarshalBytes(tok); ok {
337			return v, nil
338		}
339
340	case protoreflect.EnumKind:
341		if v, ok := unmarshalEnum(tok, fd, d.opts.DiscardUnknown); ok {
342			return v, nil
343		}
344
345	default:
346		panic(fmt.Sprintf("unmarshalScalar: invalid scalar kind %v", kind))
347	}
348
349	return protoreflect.Value{}, d.newError(tok.Pos(), "invalid value for %v field %v: %v", kind, fd.JSONName(), tok.RawString())
350}
351
352func unmarshalInt(tok json.Token, bitSize int) (protoreflect.Value, bool) {
353	switch tok.Kind() {
354	case json.Number:
355		return getInt(tok, bitSize)
356
357	case json.String:
358		// Decode number from string.
359		s := strings.TrimSpace(tok.ParsedString())
360		if len(s) != len(tok.ParsedString()) {
361			return protoreflect.Value{}, false
362		}
363		dec := json.NewDecoder([]byte(s))
364		tok, err := dec.Read()
365		if err != nil {
366			return protoreflect.Value{}, false
367		}
368		return getInt(tok, bitSize)
369	}
370	return protoreflect.Value{}, false
371}
372
373func getInt(tok json.Token, bitSize int) (protoreflect.Value, bool) {
374	n, ok := tok.Int(bitSize)
375	if !ok {
376		return protoreflect.Value{}, false
377	}
378	if bitSize == 32 {
379		return protoreflect.ValueOfInt32(int32(n)), true
380	}
381	return protoreflect.ValueOfInt64(n), true
382}
383
384func unmarshalUint(tok json.Token, bitSize int) (protoreflect.Value, bool) {
385	switch tok.Kind() {
386	case json.Number:
387		return getUint(tok, bitSize)
388
389	case json.String:
390		// Decode number from string.
391		s := strings.TrimSpace(tok.ParsedString())
392		if len(s) != len(tok.ParsedString()) {
393			return protoreflect.Value{}, false
394		}
395		dec := json.NewDecoder([]byte(s))
396		tok, err := dec.Read()
397		if err != nil {
398			return protoreflect.Value{}, false
399		}
400		return getUint(tok, bitSize)
401	}
402	return protoreflect.Value{}, false
403}
404
405func getUint(tok json.Token, bitSize int) (protoreflect.Value, bool) {
406	n, ok := tok.Uint(bitSize)
407	if !ok {
408		return protoreflect.Value{}, false
409	}
410	if bitSize == 32 {
411		return protoreflect.ValueOfUint32(uint32(n)), true
412	}
413	return protoreflect.ValueOfUint64(n), true
414}
415
416func unmarshalFloat(tok json.Token, bitSize int) (protoreflect.Value, bool) {
417	switch tok.Kind() {
418	case json.Number:
419		return getFloat(tok, bitSize)
420
421	case json.String:
422		s := tok.ParsedString()
423		switch s {
424		case "NaN":
425			if bitSize == 32 {
426				return protoreflect.ValueOfFloat32(float32(math.NaN())), true
427			}
428			return protoreflect.ValueOfFloat64(math.NaN()), true
429		case "Infinity":
430			if bitSize == 32 {
431				return protoreflect.ValueOfFloat32(float32(math.Inf(+1))), true
432			}
433			return protoreflect.ValueOfFloat64(math.Inf(+1)), true
434		case "-Infinity":
435			if bitSize == 32 {
436				return protoreflect.ValueOfFloat32(float32(math.Inf(-1))), true
437			}
438			return protoreflect.ValueOfFloat64(math.Inf(-1)), true
439		}
440
441		// Decode number from string.
442		if len(s) != len(strings.TrimSpace(s)) {
443			return protoreflect.Value{}, false
444		}
445		dec := json.NewDecoder([]byte(s))
446		tok, err := dec.Read()
447		if err != nil {
448			return protoreflect.Value{}, false
449		}
450		return getFloat(tok, bitSize)
451	}
452	return protoreflect.Value{}, false
453}
454
455func getFloat(tok json.Token, bitSize int) (protoreflect.Value, bool) {
456	n, ok := tok.Float(bitSize)
457	if !ok {
458		return protoreflect.Value{}, false
459	}
460	if bitSize == 32 {
461		return protoreflect.ValueOfFloat32(float32(n)), true
462	}
463	return protoreflect.ValueOfFloat64(n), true
464}
465
466func unmarshalBytes(tok json.Token) (protoreflect.Value, bool) {
467	if tok.Kind() != json.String {
468		return protoreflect.Value{}, false
469	}
470
471	s := tok.ParsedString()
472	enc := base64.StdEncoding
473	if strings.ContainsAny(s, "-_") {
474		enc = base64.URLEncoding
475	}
476	if len(s)%4 != 0 {
477		enc = enc.WithPadding(base64.NoPadding)
478	}
479	b, err := enc.DecodeString(s)
480	if err != nil {
481		return protoreflect.Value{}, false
482	}
483	return protoreflect.ValueOfBytes(b), true
484}
485
486func unmarshalEnum(tok json.Token, fd protoreflect.FieldDescriptor, discardUnknown bool) (protoreflect.Value, bool) {
487	switch tok.Kind() {
488	case json.String:
489		// Lookup EnumNumber based on name.
490		s := tok.ParsedString()
491		if enumVal := fd.Enum().Values().ByName(protoreflect.Name(s)); enumVal != nil {
492			return protoreflect.ValueOfEnum(enumVal.Number()), true
493		}
494		if discardUnknown {
495			return protoreflect.Value{}, true
496		}
497
498	case json.Number:
499		if n, ok := tok.Int(32); ok {
500			return protoreflect.ValueOfEnum(protoreflect.EnumNumber(n)), true
501		}
502
503	case json.Null:
504		// This is only valid for google.protobuf.NullValue.
505		if isNullValue(fd) {
506			return protoreflect.ValueOfEnum(0), true
507		}
508	}
509
510	return protoreflect.Value{}, false
511}
512
513func (d decoder) unmarshalList(list protoreflect.List, fd protoreflect.FieldDescriptor) error {
514	tok, err := d.Read()
515	if err != nil {
516		return err
517	}
518	if tok.Kind() != json.ArrayOpen {
519		return d.unexpectedTokenError(tok)
520	}
521
522	switch fd.Kind() {
523	case protoreflect.MessageKind, protoreflect.GroupKind:
524		for {
525			tok, err := d.Peek()
526			if err != nil {
527				return err
528			}
529
530			if tok.Kind() == json.ArrayClose {
531				d.Read()
532				return nil
533			}
534
535			val := list.NewElement()
536			if err := d.unmarshalMessage(val.Message(), false); err != nil {
537				return err
538			}
539			list.Append(val)
540		}
541	default:
542		for {
543			tok, err := d.Peek()
544			if err != nil {
545				return err
546			}
547
548			if tok.Kind() == json.ArrayClose {
549				d.Read()
550				return nil
551			}
552
553			val, err := d.unmarshalScalar(fd)
554			if err != nil {
555				return err
556			}
557			if val.IsValid() {
558				list.Append(val)
559			}
560		}
561	}
562
563	return nil
564}
565
566func (d decoder) unmarshalMap(mmap protoreflect.Map, fd protoreflect.FieldDescriptor) error {
567	tok, err := d.Read()
568	if err != nil {
569		return err
570	}
571	if tok.Kind() != json.ObjectOpen {
572		return d.unexpectedTokenError(tok)
573	}
574
575	// Determine ahead whether map entry is a scalar type or a message type in
576	// order to call the appropriate unmarshalMapValue func inside the for loop
577	// below.
578	var unmarshalMapValue func() (protoreflect.Value, error)
579	switch fd.MapValue().Kind() {
580	case protoreflect.MessageKind, protoreflect.GroupKind:
581		unmarshalMapValue = func() (protoreflect.Value, error) {
582			val := mmap.NewValue()
583			if err := d.unmarshalMessage(val.Message(), false); err != nil {
584				return protoreflect.Value{}, err
585			}
586			return val, nil
587		}
588	default:
589		unmarshalMapValue = func() (protoreflect.Value, error) {
590			return d.unmarshalScalar(fd.MapValue())
591		}
592	}
593
594Loop:
595	for {
596		// Read field name.
597		tok, err := d.Read()
598		if err != nil {
599			return err
600		}
601		switch tok.Kind() {
602		default:
603			return d.unexpectedTokenError(tok)
604		case json.ObjectClose:
605			break Loop
606		case json.Name:
607			// Continue.
608		}
609
610		// Unmarshal field name.
611		pkey, err := d.unmarshalMapKey(tok, fd.MapKey())
612		if err != nil {
613			return err
614		}
615
616		// Check for duplicate field name.
617		if mmap.Has(pkey) {
618			return d.newError(tok.Pos(), "duplicate map key %v", tok.RawString())
619		}
620
621		// Read and unmarshal field value.
622		pval, err := unmarshalMapValue()
623		if err != nil {
624			return err
625		}
626		if pval.IsValid() {
627			mmap.Set(pkey, pval)
628		}
629	}
630
631	return nil
632}
633
634// unmarshalMapKey converts given token of Name kind into a protoreflect.MapKey.
635// A map key type is any integral or string type.
636func (d decoder) unmarshalMapKey(tok json.Token, fd protoreflect.FieldDescriptor) (protoreflect.MapKey, error) {
637	const b32 = 32
638	const b64 = 64
639	const base10 = 10
640
641	name := tok.Name()
642	kind := fd.Kind()
643	switch kind {
644	case protoreflect.StringKind:
645		return protoreflect.ValueOfString(name).MapKey(), nil
646
647	case protoreflect.BoolKind:
648		switch name {
649		case "true":
650			return protoreflect.ValueOfBool(true).MapKey(), nil
651		case "false":
652			return protoreflect.ValueOfBool(false).MapKey(), nil
653		}
654
655	case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind:
656		if n, err := strconv.ParseInt(name, base10, b32); err == nil {
657			return protoreflect.ValueOfInt32(int32(n)).MapKey(), nil
658		}
659
660	case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind:
661		if n, err := strconv.ParseInt(name, base10, b64); err == nil {
662			return protoreflect.ValueOfInt64(int64(n)).MapKey(), nil
663		}
664
665	case protoreflect.Uint32Kind, protoreflect.Fixed32Kind:
666		if n, err := strconv.ParseUint(name, base10, b32); err == nil {
667			return protoreflect.ValueOfUint32(uint32(n)).MapKey(), nil
668		}
669
670	case protoreflect.Uint64Kind, protoreflect.Fixed64Kind:
671		if n, err := strconv.ParseUint(name, base10, b64); err == nil {
672			return protoreflect.ValueOfUint64(uint64(n)).MapKey(), nil
673		}
674
675	default:
676		panic(fmt.Sprintf("invalid kind for map key: %v", kind))
677	}
678
679	return protoreflect.MapKey{}, d.newError(tok.Pos(), "invalid value for %v key: %s", kind, tok.RawString())
680}