1// Copyright 2019 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package protojson
6
7import (
8 "encoding/base64"
9 "fmt"
10 "math"
11 "strconv"
12 "strings"
13
14 "google.golang.org/protobuf/encoding/protowire"
15 "google.golang.org/protobuf/internal/encoding/json"
16 "google.golang.org/protobuf/internal/encoding/messageset"
17 "google.golang.org/protobuf/internal/errors"
18 "google.golang.org/protobuf/internal/flags"
19 "google.golang.org/protobuf/internal/genid"
20 "google.golang.org/protobuf/internal/pragma"
21 "google.golang.org/protobuf/internal/set"
22 "google.golang.org/protobuf/proto"
23 "google.golang.org/protobuf/reflect/protoreflect"
24 "google.golang.org/protobuf/reflect/protoregistry"
25)
26
27// Unmarshal reads the given []byte into the given [proto.Message].
28// The provided message must be mutable (e.g., a non-nil pointer to a message).
29func Unmarshal(b []byte, m proto.Message) error {
30 return UnmarshalOptions{}.Unmarshal(b, m)
31}
32
33// UnmarshalOptions is a configurable JSON format parser.
34type UnmarshalOptions struct {
35 pragma.NoUnkeyedLiterals
36
37 // If AllowPartial is set, input for messages that will result in missing
38 // required fields will not return an error.
39 AllowPartial bool
40
41 // If DiscardUnknown is set, unknown fields and enum name values are ignored.
42 DiscardUnknown bool
43
44 // Resolver is used for looking up types when unmarshaling
45 // google.protobuf.Any messages or extension fields.
46 // If nil, this defaults to using protoregistry.GlobalTypes.
47 Resolver interface {
48 protoregistry.MessageTypeResolver
49 protoregistry.ExtensionTypeResolver
50 }
51
52 // RecursionLimit limits how deeply messages may be nested.
53 // If zero, a default limit is applied.
54 RecursionLimit int
55}
56
57// Unmarshal reads the given []byte and populates the given [proto.Message]
58// using options in the UnmarshalOptions object.
59// It will clear the message first before setting the fields.
60// If it returns an error, the given message may be partially set.
61// The provided message must be mutable (e.g., a non-nil pointer to a message).
62func (o UnmarshalOptions) Unmarshal(b []byte, m proto.Message) error {
63 return o.unmarshal(b, m)
64}
65
66// unmarshal is a centralized function that all unmarshal operations go through.
67// For profiling purposes, avoid changing the name of this function or
68// introducing other code paths for unmarshal that do not go through this.
69func (o UnmarshalOptions) unmarshal(b []byte, m proto.Message) error {
70 proto.Reset(m)
71
72 if o.Resolver == nil {
73 o.Resolver = protoregistry.GlobalTypes
74 }
75 if o.RecursionLimit == 0 {
76 o.RecursionLimit = protowire.DefaultRecursionLimit
77 }
78
79 dec := decoder{json.NewDecoder(b), o}
80 if err := dec.unmarshalMessage(m.ProtoReflect(), false); err != nil {
81 return err
82 }
83
84 // Check for EOF.
85 tok, err := dec.Read()
86 if err != nil {
87 return err
88 }
89 if tok.Kind() != json.EOF {
90 return dec.unexpectedTokenError(tok)
91 }
92
93 if o.AllowPartial {
94 return nil
95 }
96 return proto.CheckInitialized(m)
97}
98
99type decoder struct {
100 *json.Decoder
101 opts UnmarshalOptions
102}
103
104// newError returns an error object with position info.
105func (d decoder) newError(pos int, f string, x ...any) error {
106 line, column := d.Position(pos)
107 head := fmt.Sprintf("(line %d:%d): ", line, column)
108 return errors.New(head+f, x...)
109}
110
111// unexpectedTokenError returns a syntax error for the given unexpected token.
112func (d decoder) unexpectedTokenError(tok json.Token) error {
113 return d.syntaxError(tok.Pos(), "unexpected token %s", tok.RawString())
114}
115
116// syntaxError returns a syntax error for given position.
117func (d decoder) syntaxError(pos int, f string, x ...any) error {
118 line, column := d.Position(pos)
119 head := fmt.Sprintf("syntax error (line %d:%d): ", line, column)
120 return errors.New(head+f, x...)
121}
122
123// unmarshalMessage unmarshals a message into the given protoreflect.Message.
124func (d decoder) unmarshalMessage(m protoreflect.Message, skipTypeURL bool) error {
125 d.opts.RecursionLimit--
126 if d.opts.RecursionLimit < 0 {
127 return errors.New("exceeded max recursion depth")
128 }
129 if unmarshal := wellKnownTypeUnmarshaler(m.Descriptor().FullName()); unmarshal != nil {
130 return unmarshal(d, m)
131 }
132
133 tok, err := d.Read()
134 if err != nil {
135 return err
136 }
137 if tok.Kind() != json.ObjectOpen {
138 return d.unexpectedTokenError(tok)
139 }
140
141 messageDesc := m.Descriptor()
142 if !flags.ProtoLegacy && messageset.IsMessageSet(messageDesc) {
143 return errors.New("no support for proto1 MessageSets")
144 }
145
146 var seenNums set.Ints
147 var seenOneofs set.Ints
148 fieldDescs := messageDesc.Fields()
149 for {
150 // Read field name.
151 tok, err := d.Read()
152 if err != nil {
153 return err
154 }
155 switch tok.Kind() {
156 default:
157 return d.unexpectedTokenError(tok)
158 case json.ObjectClose:
159 return nil
160 case json.Name:
161 // Continue below.
162 }
163
164 name := tok.Name()
165 // Unmarshaling a non-custom embedded message in Any will contain the
166 // JSON field "@type" which should be skipped because it is not a field
167 // of the embedded message, but simply an artifact of the Any format.
168 if skipTypeURL && name == "@type" {
169 d.Read()
170 continue
171 }
172
173 // Get the FieldDescriptor.
174 var fd protoreflect.FieldDescriptor
175 if strings.HasPrefix(name, "[") && strings.HasSuffix(name, "]") {
176 // Only extension names are in [name] format.
177 extName := protoreflect.FullName(name[1 : len(name)-1])
178 extType, err := d.opts.Resolver.FindExtensionByName(extName)
179 if err != nil && err != protoregistry.NotFound {
180 return d.newError(tok.Pos(), "unable to resolve %s: %v", tok.RawString(), err)
181 }
182 if extType != nil {
183 fd = extType.TypeDescriptor()
184 if !messageDesc.ExtensionRanges().Has(fd.Number()) || fd.ContainingMessage().FullName() != messageDesc.FullName() {
185 return d.newError(tok.Pos(), "message %v cannot be extended by %v", messageDesc.FullName(), fd.FullName())
186 }
187 }
188 } else {
189 // The name can either be the JSON name or the proto field name.
190 fd = fieldDescs.ByJSONName(name)
191 if fd == nil {
192 fd = fieldDescs.ByTextName(name)
193 }
194 }
195
196 if fd == nil {
197 // Field is unknown.
198 if d.opts.DiscardUnknown {
199 if err := d.skipJSONValue(); err != nil {
200 return err
201 }
202 continue
203 }
204 return d.newError(tok.Pos(), "unknown field %v", tok.RawString())
205 }
206
207 // Do not allow duplicate fields.
208 num := uint64(fd.Number())
209 if seenNums.Has(num) {
210 return d.newError(tok.Pos(), "duplicate field %v", tok.RawString())
211 }
212 seenNums.Set(num)
213
214 // No need to set values for JSON null unless the field type is
215 // google.protobuf.Value or google.protobuf.NullValue.
216 if tok, _ := d.Peek(); tok.Kind() == json.Null && !isKnownValue(fd) && !isNullValue(fd) {
217 d.Read()
218 continue
219 }
220
221 switch {
222 case fd.IsList():
223 list := m.Mutable(fd).List()
224 if err := d.unmarshalList(list, fd); err != nil {
225 return err
226 }
227 case fd.IsMap():
228 mmap := m.Mutable(fd).Map()
229 if err := d.unmarshalMap(mmap, fd); err != nil {
230 return err
231 }
232 default:
233 // If field is a oneof, check if it has already been set.
234 if od := fd.ContainingOneof(); od != nil {
235 idx := uint64(od.Index())
236 if seenOneofs.Has(idx) {
237 return d.newError(tok.Pos(), "error parsing %s, oneof %v is already set", tok.RawString(), od.FullName())
238 }
239 seenOneofs.Set(idx)
240 }
241
242 // Required or optional fields.
243 if err := d.unmarshalSingular(m, fd); err != nil {
244 return err
245 }
246 }
247 }
248}
249
250func isKnownValue(fd protoreflect.FieldDescriptor) bool {
251 md := fd.Message()
252 return md != nil && md.FullName() == genid.Value_message_fullname
253}
254
255func isNullValue(fd protoreflect.FieldDescriptor) bool {
256 ed := fd.Enum()
257 return ed != nil && ed.FullName() == genid.NullValue_enum_fullname
258}
259
260// unmarshalSingular unmarshals to the non-repeated field specified
261// by the given FieldDescriptor.
262func (d decoder) unmarshalSingular(m protoreflect.Message, fd protoreflect.FieldDescriptor) error {
263 var val protoreflect.Value
264 var err error
265 switch fd.Kind() {
266 case protoreflect.MessageKind, protoreflect.GroupKind:
267 val = m.NewField(fd)
268 err = d.unmarshalMessage(val.Message(), false)
269 default:
270 val, err = d.unmarshalScalar(fd)
271 }
272
273 if err != nil {
274 return err
275 }
276 if val.IsValid() {
277 m.Set(fd, val)
278 }
279 return nil
280}
281
282// unmarshalScalar unmarshals to a scalar/enum protoreflect.Value specified by
283// the given FieldDescriptor.
284func (d decoder) unmarshalScalar(fd protoreflect.FieldDescriptor) (protoreflect.Value, error) {
285 const b32 int = 32
286 const b64 int = 64
287
288 tok, err := d.Read()
289 if err != nil {
290 return protoreflect.Value{}, err
291 }
292
293 kind := fd.Kind()
294 switch kind {
295 case protoreflect.BoolKind:
296 if tok.Kind() == json.Bool {
297 return protoreflect.ValueOfBool(tok.Bool()), nil
298 }
299
300 case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind:
301 if v, ok := unmarshalInt(tok, b32); ok {
302 return v, nil
303 }
304
305 case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind:
306 if v, ok := unmarshalInt(tok, b64); ok {
307 return v, nil
308 }
309
310 case protoreflect.Uint32Kind, protoreflect.Fixed32Kind:
311 if v, ok := unmarshalUint(tok, b32); ok {
312 return v, nil
313 }
314
315 case protoreflect.Uint64Kind, protoreflect.Fixed64Kind:
316 if v, ok := unmarshalUint(tok, b64); ok {
317 return v, nil
318 }
319
320 case protoreflect.FloatKind:
321 if v, ok := unmarshalFloat(tok, b32); ok {
322 return v, nil
323 }
324
325 case protoreflect.DoubleKind:
326 if v, ok := unmarshalFloat(tok, b64); ok {
327 return v, nil
328 }
329
330 case protoreflect.StringKind:
331 if tok.Kind() == json.String {
332 return protoreflect.ValueOfString(tok.ParsedString()), nil
333 }
334
335 case protoreflect.BytesKind:
336 if v, ok := unmarshalBytes(tok); ok {
337 return v, nil
338 }
339
340 case protoreflect.EnumKind:
341 if v, ok := unmarshalEnum(tok, fd, d.opts.DiscardUnknown); ok {
342 return v, nil
343 }
344
345 default:
346 panic(fmt.Sprintf("unmarshalScalar: invalid scalar kind %v", kind))
347 }
348
349 return protoreflect.Value{}, d.newError(tok.Pos(), "invalid value for %v field %v: %v", kind, fd.JSONName(), tok.RawString())
350}
351
352func unmarshalInt(tok json.Token, bitSize int) (protoreflect.Value, bool) {
353 switch tok.Kind() {
354 case json.Number:
355 return getInt(tok, bitSize)
356
357 case json.String:
358 // Decode number from string.
359 s := strings.TrimSpace(tok.ParsedString())
360 if len(s) != len(tok.ParsedString()) {
361 return protoreflect.Value{}, false
362 }
363 dec := json.NewDecoder([]byte(s))
364 tok, err := dec.Read()
365 if err != nil {
366 return protoreflect.Value{}, false
367 }
368 return getInt(tok, bitSize)
369 }
370 return protoreflect.Value{}, false
371}
372
373func getInt(tok json.Token, bitSize int) (protoreflect.Value, bool) {
374 n, ok := tok.Int(bitSize)
375 if !ok {
376 return protoreflect.Value{}, false
377 }
378 if bitSize == 32 {
379 return protoreflect.ValueOfInt32(int32(n)), true
380 }
381 return protoreflect.ValueOfInt64(n), true
382}
383
384func unmarshalUint(tok json.Token, bitSize int) (protoreflect.Value, bool) {
385 switch tok.Kind() {
386 case json.Number:
387 return getUint(tok, bitSize)
388
389 case json.String:
390 // Decode number from string.
391 s := strings.TrimSpace(tok.ParsedString())
392 if len(s) != len(tok.ParsedString()) {
393 return protoreflect.Value{}, false
394 }
395 dec := json.NewDecoder([]byte(s))
396 tok, err := dec.Read()
397 if err != nil {
398 return protoreflect.Value{}, false
399 }
400 return getUint(tok, bitSize)
401 }
402 return protoreflect.Value{}, false
403}
404
405func getUint(tok json.Token, bitSize int) (protoreflect.Value, bool) {
406 n, ok := tok.Uint(bitSize)
407 if !ok {
408 return protoreflect.Value{}, false
409 }
410 if bitSize == 32 {
411 return protoreflect.ValueOfUint32(uint32(n)), true
412 }
413 return protoreflect.ValueOfUint64(n), true
414}
415
416func unmarshalFloat(tok json.Token, bitSize int) (protoreflect.Value, bool) {
417 switch tok.Kind() {
418 case json.Number:
419 return getFloat(tok, bitSize)
420
421 case json.String:
422 s := tok.ParsedString()
423 switch s {
424 case "NaN":
425 if bitSize == 32 {
426 return protoreflect.ValueOfFloat32(float32(math.NaN())), true
427 }
428 return protoreflect.ValueOfFloat64(math.NaN()), true
429 case "Infinity":
430 if bitSize == 32 {
431 return protoreflect.ValueOfFloat32(float32(math.Inf(+1))), true
432 }
433 return protoreflect.ValueOfFloat64(math.Inf(+1)), true
434 case "-Infinity":
435 if bitSize == 32 {
436 return protoreflect.ValueOfFloat32(float32(math.Inf(-1))), true
437 }
438 return protoreflect.ValueOfFloat64(math.Inf(-1)), true
439 }
440
441 // Decode number from string.
442 if len(s) != len(strings.TrimSpace(s)) {
443 return protoreflect.Value{}, false
444 }
445 dec := json.NewDecoder([]byte(s))
446 tok, err := dec.Read()
447 if err != nil {
448 return protoreflect.Value{}, false
449 }
450 return getFloat(tok, bitSize)
451 }
452 return protoreflect.Value{}, false
453}
454
455func getFloat(tok json.Token, bitSize int) (protoreflect.Value, bool) {
456 n, ok := tok.Float(bitSize)
457 if !ok {
458 return protoreflect.Value{}, false
459 }
460 if bitSize == 32 {
461 return protoreflect.ValueOfFloat32(float32(n)), true
462 }
463 return protoreflect.ValueOfFloat64(n), true
464}
465
466func unmarshalBytes(tok json.Token) (protoreflect.Value, bool) {
467 if tok.Kind() != json.String {
468 return protoreflect.Value{}, false
469 }
470
471 s := tok.ParsedString()
472 enc := base64.StdEncoding
473 if strings.ContainsAny(s, "-_") {
474 enc = base64.URLEncoding
475 }
476 if len(s)%4 != 0 {
477 enc = enc.WithPadding(base64.NoPadding)
478 }
479 b, err := enc.DecodeString(s)
480 if err != nil {
481 return protoreflect.Value{}, false
482 }
483 return protoreflect.ValueOfBytes(b), true
484}
485
486func unmarshalEnum(tok json.Token, fd protoreflect.FieldDescriptor, discardUnknown bool) (protoreflect.Value, bool) {
487 switch tok.Kind() {
488 case json.String:
489 // Lookup EnumNumber based on name.
490 s := tok.ParsedString()
491 if enumVal := fd.Enum().Values().ByName(protoreflect.Name(s)); enumVal != nil {
492 return protoreflect.ValueOfEnum(enumVal.Number()), true
493 }
494 if discardUnknown {
495 return protoreflect.Value{}, true
496 }
497
498 case json.Number:
499 if n, ok := tok.Int(32); ok {
500 return protoreflect.ValueOfEnum(protoreflect.EnumNumber(n)), true
501 }
502
503 case json.Null:
504 // This is only valid for google.protobuf.NullValue.
505 if isNullValue(fd) {
506 return protoreflect.ValueOfEnum(0), true
507 }
508 }
509
510 return protoreflect.Value{}, false
511}
512
513func (d decoder) unmarshalList(list protoreflect.List, fd protoreflect.FieldDescriptor) error {
514 tok, err := d.Read()
515 if err != nil {
516 return err
517 }
518 if tok.Kind() != json.ArrayOpen {
519 return d.unexpectedTokenError(tok)
520 }
521
522 switch fd.Kind() {
523 case protoreflect.MessageKind, protoreflect.GroupKind:
524 for {
525 tok, err := d.Peek()
526 if err != nil {
527 return err
528 }
529
530 if tok.Kind() == json.ArrayClose {
531 d.Read()
532 return nil
533 }
534
535 val := list.NewElement()
536 if err := d.unmarshalMessage(val.Message(), false); err != nil {
537 return err
538 }
539 list.Append(val)
540 }
541 default:
542 for {
543 tok, err := d.Peek()
544 if err != nil {
545 return err
546 }
547
548 if tok.Kind() == json.ArrayClose {
549 d.Read()
550 return nil
551 }
552
553 val, err := d.unmarshalScalar(fd)
554 if err != nil {
555 return err
556 }
557 if val.IsValid() {
558 list.Append(val)
559 }
560 }
561 }
562
563 return nil
564}
565
566func (d decoder) unmarshalMap(mmap protoreflect.Map, fd protoreflect.FieldDescriptor) error {
567 tok, err := d.Read()
568 if err != nil {
569 return err
570 }
571 if tok.Kind() != json.ObjectOpen {
572 return d.unexpectedTokenError(tok)
573 }
574
575 // Determine ahead whether map entry is a scalar type or a message type in
576 // order to call the appropriate unmarshalMapValue func inside the for loop
577 // below.
578 var unmarshalMapValue func() (protoreflect.Value, error)
579 switch fd.MapValue().Kind() {
580 case protoreflect.MessageKind, protoreflect.GroupKind:
581 unmarshalMapValue = func() (protoreflect.Value, error) {
582 val := mmap.NewValue()
583 if err := d.unmarshalMessage(val.Message(), false); err != nil {
584 return protoreflect.Value{}, err
585 }
586 return val, nil
587 }
588 default:
589 unmarshalMapValue = func() (protoreflect.Value, error) {
590 return d.unmarshalScalar(fd.MapValue())
591 }
592 }
593
594Loop:
595 for {
596 // Read field name.
597 tok, err := d.Read()
598 if err != nil {
599 return err
600 }
601 switch tok.Kind() {
602 default:
603 return d.unexpectedTokenError(tok)
604 case json.ObjectClose:
605 break Loop
606 case json.Name:
607 // Continue.
608 }
609
610 // Unmarshal field name.
611 pkey, err := d.unmarshalMapKey(tok, fd.MapKey())
612 if err != nil {
613 return err
614 }
615
616 // Check for duplicate field name.
617 if mmap.Has(pkey) {
618 return d.newError(tok.Pos(), "duplicate map key %v", tok.RawString())
619 }
620
621 // Read and unmarshal field value.
622 pval, err := unmarshalMapValue()
623 if err != nil {
624 return err
625 }
626 if pval.IsValid() {
627 mmap.Set(pkey, pval)
628 }
629 }
630
631 return nil
632}
633
634// unmarshalMapKey converts given token of Name kind into a protoreflect.MapKey.
635// A map key type is any integral or string type.
636func (d decoder) unmarshalMapKey(tok json.Token, fd protoreflect.FieldDescriptor) (protoreflect.MapKey, error) {
637 const b32 = 32
638 const b64 = 64
639 const base10 = 10
640
641 name := tok.Name()
642 kind := fd.Kind()
643 switch kind {
644 case protoreflect.StringKind:
645 return protoreflect.ValueOfString(name).MapKey(), nil
646
647 case protoreflect.BoolKind:
648 switch name {
649 case "true":
650 return protoreflect.ValueOfBool(true).MapKey(), nil
651 case "false":
652 return protoreflect.ValueOfBool(false).MapKey(), nil
653 }
654
655 case protoreflect.Int32Kind, protoreflect.Sint32Kind, protoreflect.Sfixed32Kind:
656 if n, err := strconv.ParseInt(name, base10, b32); err == nil {
657 return protoreflect.ValueOfInt32(int32(n)).MapKey(), nil
658 }
659
660 case protoreflect.Int64Kind, protoreflect.Sint64Kind, protoreflect.Sfixed64Kind:
661 if n, err := strconv.ParseInt(name, base10, b64); err == nil {
662 return protoreflect.ValueOfInt64(int64(n)).MapKey(), nil
663 }
664
665 case protoreflect.Uint32Kind, protoreflect.Fixed32Kind:
666 if n, err := strconv.ParseUint(name, base10, b32); err == nil {
667 return protoreflect.ValueOfUint32(uint32(n)).MapKey(), nil
668 }
669
670 case protoreflect.Uint64Kind, protoreflect.Fixed64Kind:
671 if n, err := strconv.ParseUint(name, base10, b64); err == nil {
672 return protoreflect.ValueOfUint64(uint64(n)).MapKey(), nil
673 }
674
675 default:
676 panic(fmt.Sprintf("invalid kind for map key: %v", kind))
677 }
678
679 return protoreflect.MapKey{}, d.newError(tok.Pos(), "invalid value for %v key: %s", kind, tok.RawString())
680}