1package jsonparser
2
3import (
4 "bytes"
5 "errors"
6 "fmt"
7 "strconv"
8)
9
10// Errors
11var (
12 KeyPathNotFoundError = errors.New("Key path not found")
13 UnknownValueTypeError = errors.New("Unknown value type")
14 MalformedJsonError = errors.New("Malformed JSON error")
15 MalformedStringError = errors.New("Value is string, but can't find closing '\"' symbol")
16 MalformedArrayError = errors.New("Value is array, but can't find closing ']' symbol")
17 MalformedObjectError = errors.New("Value looks like object, but can't find closing '}' symbol")
18 MalformedValueError = errors.New("Value looks like Number/Boolean/None, but can't find its end: ',' or '}' symbol")
19 OverflowIntegerError = errors.New("Value is number, but overflowed while parsing")
20 MalformedStringEscapeError = errors.New("Encountered an invalid escape sequence in a string")
21)
22
23// How much stack space to allocate for unescaping JSON strings; if a string longer
24// than this needs to be escaped, it will result in a heap allocation
25const unescapeStackBufSize = 64
26
27func tokenEnd(data []byte) int {
28 for i, c := range data {
29 switch c {
30 case ' ', '\n', '\r', '\t', ',', '}', ']':
31 return i
32 }
33 }
34
35 return len(data)
36}
37
38func findTokenStart(data []byte, token byte) int {
39 for i := len(data) - 1; i >= 0; i-- {
40 switch data[i] {
41 case token:
42 return i
43 case '[', '{':
44 return 0
45 }
46 }
47
48 return 0
49}
50
51func findKeyStart(data []byte, key string) (int, error) {
52 i := 0
53 ln := len(data)
54 if ln > 0 && (data[0] == '{' || data[0] == '[') {
55 i = 1
56 }
57 var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
58
59 if ku, err := Unescape(StringToBytes(key), stackbuf[:]); err == nil {
60 key = bytesToString(&ku)
61 }
62
63 for i < ln {
64 switch data[i] {
65 case '"':
66 i++
67 keyBegin := i
68
69 strEnd, keyEscaped := stringEnd(data[i:])
70 if strEnd == -1 {
71 break
72 }
73 i += strEnd
74 keyEnd := i - 1
75
76 valueOffset := nextToken(data[i:])
77 if valueOffset == -1 {
78 break
79 }
80
81 i += valueOffset
82
83 // if string is a key, and key level match
84 k := data[keyBegin:keyEnd]
85 // for unescape: if there are no escape sequences, this is cheap; if there are, it is a
86 // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize
87 if keyEscaped {
88 if ku, err := Unescape(k, stackbuf[:]); err != nil {
89 break
90 } else {
91 k = ku
92 }
93 }
94
95 if data[i] == ':' && len(key) == len(k) && bytesToString(&k) == key {
96 return keyBegin - 1, nil
97 }
98
99 case '[':
100 end := blockEnd(data[i:], data[i], ']')
101 if end != -1 {
102 i = i + end
103 }
104 case '{':
105 end := blockEnd(data[i:], data[i], '}')
106 if end != -1 {
107 i = i + end
108 }
109 }
110 i++
111 }
112
113 return -1, KeyPathNotFoundError
114}
115
116func tokenStart(data []byte) int {
117 for i := len(data) - 1; i >= 0; i-- {
118 switch data[i] {
119 case '\n', '\r', '\t', ',', '{', '[':
120 return i
121 }
122 }
123
124 return 0
125}
126
127// Find position of next character which is not whitespace
128func nextToken(data []byte) int {
129 for i, c := range data {
130 switch c {
131 case ' ', '\n', '\r', '\t':
132 continue
133 default:
134 return i
135 }
136 }
137
138 return -1
139}
140
141// Find position of last character which is not whitespace
142func lastToken(data []byte) int {
143 for i := len(data) - 1; i >= 0; i-- {
144 switch data[i] {
145 case ' ', '\n', '\r', '\t':
146 continue
147 default:
148 return i
149 }
150 }
151
152 return -1
153}
154
155// Tries to find the end of string
156// Support if string contains escaped quote symbols.
157func stringEnd(data []byte) (int, bool) {
158 escaped := false
159 for i, c := range data {
160 if c == '"' {
161 if !escaped {
162 return i + 1, false
163 } else {
164 j := i - 1
165 for {
166 if j < 0 || data[j] != '\\' {
167 return i + 1, true // even number of backslashes
168 }
169 j--
170 if j < 0 || data[j] != '\\' {
171 break // odd number of backslashes
172 }
173 j--
174
175 }
176 }
177 } else if c == '\\' {
178 escaped = true
179 }
180 }
181
182 return -1, escaped
183}
184
185// Find end of the data structure, array or object.
186// For array openSym and closeSym will be '[' and ']', for object '{' and '}'
187func blockEnd(data []byte, openSym byte, closeSym byte) int {
188 level := 0
189 i := 0
190 ln := len(data)
191
192 for i < ln {
193 switch data[i] {
194 case '"': // If inside string, skip it
195 se, _ := stringEnd(data[i+1:])
196 if se == -1 {
197 return -1
198 }
199 i += se
200 case openSym: // If open symbol, increase level
201 level++
202 case closeSym: // If close symbol, increase level
203 level--
204
205 // If we have returned to the original level, we're done
206 if level == 0 {
207 return i + 1
208 }
209 }
210 i++
211 }
212
213 return -1
214}
215
216func searchKeys(data []byte, keys ...string) int {
217 keyLevel := 0
218 level := 0
219 i := 0
220 ln := len(data)
221 lk := len(keys)
222 lastMatched := true
223
224 if lk == 0 {
225 return 0
226 }
227
228 var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
229
230 for i < ln {
231 switch data[i] {
232 case '"':
233 i++
234 keyBegin := i
235
236 strEnd, keyEscaped := stringEnd(data[i:])
237 if strEnd == -1 {
238 return -1
239 }
240 i += strEnd
241 keyEnd := i - 1
242
243 valueOffset := nextToken(data[i:])
244 if valueOffset == -1 {
245 return -1
246 }
247
248 i += valueOffset
249
250 // if string is a key
251 if data[i] == ':' {
252 if level < 1 {
253 return -1
254 }
255
256 key := data[keyBegin:keyEnd]
257
258 // for unescape: if there are no escape sequences, this is cheap; if there are, it is a
259 // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize
260 var keyUnesc []byte
261 if !keyEscaped {
262 keyUnesc = key
263 } else if ku, err := Unescape(key, stackbuf[:]); err != nil {
264 return -1
265 } else {
266 keyUnesc = ku
267 }
268
269 if level <= len(keys) {
270 if equalStr(&keyUnesc, keys[level-1]) {
271 lastMatched = true
272
273 // if key level match
274 if keyLevel == level-1 {
275 keyLevel++
276 // If we found all keys in path
277 if keyLevel == lk {
278 return i + 1
279 }
280 }
281 } else {
282 lastMatched = false
283 }
284 } else {
285 return -1
286 }
287 } else {
288 i--
289 }
290 case '{':
291
292 // in case parent key is matched then only we will increase the level otherwise can directly
293 // can move to the end of this block
294 if !lastMatched {
295 end := blockEnd(data[i:], '{', '}')
296 if end == -1 {
297 return -1
298 }
299 i += end - 1
300 } else {
301 level++
302 }
303 case '}':
304 level--
305 if level == keyLevel {
306 keyLevel--
307 }
308 case '[':
309 // If we want to get array element by index
310 if keyLevel == level && keys[level][0] == '[' {
311 var keyLen = len(keys[level])
312 if keyLen < 3 || keys[level][0] != '[' || keys[level][keyLen-1] != ']' {
313 return -1
314 }
315 aIdx, err := strconv.Atoi(keys[level][1 : keyLen-1])
316 if err != nil {
317 return -1
318 }
319 var curIdx int
320 var valueFound []byte
321 var valueOffset int
322 var curI = i
323 ArrayEach(data[i:], func(value []byte, dataType ValueType, offset int, err error) {
324 if curIdx == aIdx {
325 valueFound = value
326 valueOffset = offset
327 if dataType == String {
328 valueOffset = valueOffset - 2
329 valueFound = data[curI+valueOffset : curI+valueOffset+len(value)+2]
330 }
331 }
332 curIdx += 1
333 })
334
335 if valueFound == nil {
336 return -1
337 } else {
338 subIndex := searchKeys(valueFound, keys[level+1:]...)
339 if subIndex < 0 {
340 return -1
341 }
342 return i + valueOffset + subIndex
343 }
344 } else {
345 // Do not search for keys inside arrays
346 if arraySkip := blockEnd(data[i:], '[', ']'); arraySkip == -1 {
347 return -1
348 } else {
349 i += arraySkip - 1
350 }
351 }
352 case ':': // If encountered, JSON data is malformed
353 return -1
354 }
355
356 i++
357 }
358
359 return -1
360}
361
362func sameTree(p1, p2 []string) bool {
363 minLen := len(p1)
364 if len(p2) < minLen {
365 minLen = len(p2)
366 }
367
368 for pi_1, p_1 := range p1[:minLen] {
369 if p2[pi_1] != p_1 {
370 return false
371 }
372 }
373
374 return true
375}
376
377func EachKey(data []byte, cb func(int, []byte, ValueType, error), paths ...[]string) int {
378 var x struct{}
379 pathFlags := make([]bool, len(paths))
380 var level, pathsMatched, i int
381 ln := len(data)
382
383 var maxPath int
384 for _, p := range paths {
385 if len(p) > maxPath {
386 maxPath = len(p)
387 }
388 }
389
390 pathsBuf := make([]string, maxPath)
391
392 for i < ln {
393 switch data[i] {
394 case '"':
395 i++
396 keyBegin := i
397
398 strEnd, keyEscaped := stringEnd(data[i:])
399 if strEnd == -1 {
400 return -1
401 }
402 i += strEnd
403
404 keyEnd := i - 1
405
406 valueOffset := nextToken(data[i:])
407 if valueOffset == -1 {
408 return -1
409 }
410
411 i += valueOffset
412
413 // if string is a key, and key level match
414 if data[i] == ':' {
415 match := -1
416 key := data[keyBegin:keyEnd]
417
418 // for unescape: if there are no escape sequences, this is cheap; if there are, it is a
419 // bit more expensive, but causes no allocations unless len(key) > unescapeStackBufSize
420 var keyUnesc []byte
421 if !keyEscaped {
422 keyUnesc = key
423 } else {
424 var stackbuf [unescapeStackBufSize]byte
425 if ku, err := Unescape(key, stackbuf[:]); err != nil {
426 return -1
427 } else {
428 keyUnesc = ku
429 }
430 }
431
432 if maxPath >= level {
433 if level < 1 {
434 cb(-1, nil, Unknown, MalformedJsonError)
435 return -1
436 }
437
438 pathsBuf[level-1] = bytesToString(&keyUnesc)
439 for pi, p := range paths {
440 if len(p) != level || pathFlags[pi] || !equalStr(&keyUnesc, p[level-1]) || !sameTree(p, pathsBuf[:level]) {
441 continue
442 }
443
444 match = pi
445
446 pathsMatched++
447 pathFlags[pi] = true
448
449 v, dt, _, e := Get(data[i+1:])
450 cb(pi, v, dt, e)
451
452 if pathsMatched == len(paths) {
453 break
454 }
455 }
456 if pathsMatched == len(paths) {
457 return i
458 }
459 }
460
461 if match == -1 {
462 tokenOffset := nextToken(data[i+1:])
463 i += tokenOffset
464
465 if data[i] == '{' {
466 blockSkip := blockEnd(data[i:], '{', '}')
467 i += blockSkip + 1
468 }
469 }
470
471 if i < ln {
472 switch data[i] {
473 case '{', '}', '[', '"':
474 i--
475 }
476 }
477 } else {
478 i--
479 }
480 case '{':
481 level++
482 case '}':
483 level--
484 case '[':
485 var ok bool
486 arrIdxFlags := make(map[int]struct{})
487 pIdxFlags := make([]bool, len(paths))
488
489 if level < 0 {
490 cb(-1, nil, Unknown, MalformedJsonError)
491 return -1
492 }
493
494 for pi, p := range paths {
495 if len(p) < level+1 || pathFlags[pi] || p[level][0] != '[' || !sameTree(p, pathsBuf[:level]) {
496 continue
497 }
498 if len(p[level]) >= 2 {
499 aIdx, _ := strconv.Atoi(p[level][1 : len(p[level])-1])
500 arrIdxFlags[aIdx] = x
501 pIdxFlags[pi] = true
502 }
503 }
504
505 if len(arrIdxFlags) > 0 {
506 level++
507
508 var curIdx int
509 arrOff, _ := ArrayEach(data[i:], func(value []byte, dataType ValueType, offset int, err error) {
510 if _, ok = arrIdxFlags[curIdx]; ok {
511 for pi, p := range paths {
512 if pIdxFlags[pi] {
513 aIdx, _ := strconv.Atoi(p[level-1][1 : len(p[level-1])-1])
514
515 if curIdx == aIdx {
516 of := searchKeys(value, p[level:]...)
517
518 pathsMatched++
519 pathFlags[pi] = true
520
521 if of != -1 {
522 v, dt, _, e := Get(value[of:])
523 cb(pi, v, dt, e)
524 }
525 }
526 }
527 }
528 }
529
530 curIdx += 1
531 })
532
533 if pathsMatched == len(paths) {
534 return i
535 }
536
537 i += arrOff - 1
538 } else {
539 // Do not search for keys inside arrays
540 if arraySkip := blockEnd(data[i:], '[', ']'); arraySkip == -1 {
541 return -1
542 } else {
543 i += arraySkip - 1
544 }
545 }
546 case ']':
547 level--
548 }
549
550 i++
551 }
552
553 return -1
554}
555
556// Data types available in valid JSON data.
557type ValueType int
558
559const (
560 NotExist = ValueType(iota)
561 String
562 Number
563 Object
564 Array
565 Boolean
566 Null
567 Unknown
568)
569
570func (vt ValueType) String() string {
571 switch vt {
572 case NotExist:
573 return "non-existent"
574 case String:
575 return "string"
576 case Number:
577 return "number"
578 case Object:
579 return "object"
580 case Array:
581 return "array"
582 case Boolean:
583 return "boolean"
584 case Null:
585 return "null"
586 default:
587 return "unknown"
588 }
589}
590
591var (
592 trueLiteral = []byte("true")
593 falseLiteral = []byte("false")
594 nullLiteral = []byte("null")
595)
596
597func createInsertComponent(keys []string, setValue []byte, comma, object bool) []byte {
598 isIndex := string(keys[0][0]) == "["
599 offset := 0
600 lk := calcAllocateSpace(keys, setValue, comma, object)
601 buffer := make([]byte, lk, lk)
602 if comma {
603 offset += WriteToBuffer(buffer[offset:], ",")
604 }
605 if isIndex && !comma {
606 offset += WriteToBuffer(buffer[offset:], "[")
607 } else {
608 if object {
609 offset += WriteToBuffer(buffer[offset:], "{")
610 }
611 if !isIndex {
612 offset += WriteToBuffer(buffer[offset:], "\"")
613 offset += WriteToBuffer(buffer[offset:], keys[0])
614 offset += WriteToBuffer(buffer[offset:], "\":")
615 }
616 }
617
618 for i := 1; i < len(keys); i++ {
619 if string(keys[i][0]) == "[" {
620 offset += WriteToBuffer(buffer[offset:], "[")
621 } else {
622 offset += WriteToBuffer(buffer[offset:], "{\"")
623 offset += WriteToBuffer(buffer[offset:], keys[i])
624 offset += WriteToBuffer(buffer[offset:], "\":")
625 }
626 }
627 offset += WriteToBuffer(buffer[offset:], string(setValue))
628 for i := len(keys) - 1; i > 0; i-- {
629 if string(keys[i][0]) == "[" {
630 offset += WriteToBuffer(buffer[offset:], "]")
631 } else {
632 offset += WriteToBuffer(buffer[offset:], "}")
633 }
634 }
635 if isIndex && !comma {
636 offset += WriteToBuffer(buffer[offset:], "]")
637 }
638 if object && !isIndex {
639 offset += WriteToBuffer(buffer[offset:], "}")
640 }
641 return buffer
642}
643
644func calcAllocateSpace(keys []string, setValue []byte, comma, object bool) int {
645 isIndex := string(keys[0][0]) == "["
646 lk := 0
647 if comma {
648 // ,
649 lk += 1
650 }
651 if isIndex && !comma {
652 // []
653 lk += 2
654 } else {
655 if object {
656 // {
657 lk += 1
658 }
659 if !isIndex {
660 // "keys[0]"
661 lk += len(keys[0]) + 3
662 }
663 }
664
665
666 lk += len(setValue)
667 for i := 1; i < len(keys); i++ {
668 if string(keys[i][0]) == "[" {
669 // []
670 lk += 2
671 } else {
672 // {"keys[i]":setValue}
673 lk += len(keys[i]) + 5
674 }
675 }
676
677 if object && !isIndex {
678 // }
679 lk += 1
680 }
681
682 return lk
683}
684
685func WriteToBuffer(buffer []byte, str string) int {
686 copy(buffer, str)
687 return len(str)
688}
689
690/*
691
692Del - Receives existing data structure, path to delete.
693
694Returns:
695`data` - return modified data
696
697*/
698func Delete(data []byte, keys ...string) []byte {
699 lk := len(keys)
700 if lk == 0 {
701 return data[:0]
702 }
703
704 array := false
705 if len(keys[lk-1]) > 0 && string(keys[lk-1][0]) == "[" {
706 array = true
707 }
708
709 var startOffset, keyOffset int
710 endOffset := len(data)
711 var err error
712 if !array {
713 if len(keys) > 1 {
714 _, _, startOffset, endOffset, err = internalGet(data, keys[:lk-1]...)
715 if err == KeyPathNotFoundError {
716 // problem parsing the data
717 return data
718 }
719 }
720
721 keyOffset, err = findKeyStart(data[startOffset:endOffset], keys[lk-1])
722 if err == KeyPathNotFoundError {
723 // problem parsing the data
724 return data
725 }
726 keyOffset += startOffset
727 _, _, _, subEndOffset, _ := internalGet(data[startOffset:endOffset], keys[lk-1])
728 endOffset = startOffset + subEndOffset
729 tokEnd := tokenEnd(data[endOffset:])
730 tokStart := findTokenStart(data[:keyOffset], ","[0])
731
732 if data[endOffset+tokEnd] == ","[0] {
733 endOffset += tokEnd + 1
734 } else if data[endOffset+tokEnd] == " "[0] && len(data) > endOffset+tokEnd+1 && data[endOffset+tokEnd+1] == ","[0] {
735 endOffset += tokEnd + 2
736 } else if data[endOffset+tokEnd] == "}"[0] && data[tokStart] == ","[0] {
737 keyOffset = tokStart
738 }
739 } else {
740 _, _, keyOffset, endOffset, err = internalGet(data, keys...)
741 if err == KeyPathNotFoundError {
742 // problem parsing the data
743 return data
744 }
745
746 tokEnd := tokenEnd(data[endOffset:])
747 tokStart := findTokenStart(data[:keyOffset], ","[0])
748
749 if data[endOffset+tokEnd] == ","[0] {
750 endOffset += tokEnd + 1
751 } else if data[endOffset+tokEnd] == "]"[0] && data[tokStart] == ","[0] {
752 keyOffset = tokStart
753 }
754 }
755
756 // We need to remove remaining trailing comma if we delete las element in the object
757 prevTok := lastToken(data[:keyOffset])
758 remainedValue := data[endOffset:]
759
760 var newOffset int
761 if nextToken(remainedValue) > -1 && remainedValue[nextToken(remainedValue)] == '}' && data[prevTok] == ',' {
762 newOffset = prevTok
763 } else {
764 newOffset = prevTok + 1
765 }
766
767 // We have to make a copy here if we don't want to mangle the original data, because byte slices are
768 // accessed by reference and not by value
769 dataCopy := make([]byte, len(data))
770 copy(dataCopy, data)
771 data = append(dataCopy[:newOffset], dataCopy[endOffset:]...)
772
773 return data
774}
775
776/*
777
778Set - Receives existing data structure, path to set, and data to set at that key.
779
780Returns:
781`value` - modified byte array
782`err` - On any parsing error
783
784*/
785func Set(data []byte, setValue []byte, keys ...string) (value []byte, err error) {
786 // ensure keys are set
787 if len(keys) == 0 {
788 return nil, KeyPathNotFoundError
789 }
790
791 _, _, startOffset, endOffset, err := internalGet(data, keys...)
792 if err != nil {
793 if err != KeyPathNotFoundError {
794 // problem parsing the data
795 return nil, err
796 }
797 // full path doesnt exist
798 // does any subpath exist?
799 var depth int
800 for i := range keys {
801 _, _, start, end, sErr := internalGet(data, keys[:i+1]...)
802 if sErr != nil {
803 break
804 } else {
805 endOffset = end
806 startOffset = start
807 depth++
808 }
809 }
810 comma := true
811 object := false
812 if endOffset == -1 {
813 firstToken := nextToken(data)
814 // We can't set a top-level key if data isn't an object
815 if firstToken < 0 || data[firstToken] != '{' {
816 return nil, KeyPathNotFoundError
817 }
818 // Don't need a comma if the input is an empty object
819 secondToken := firstToken + 1 + nextToken(data[firstToken+1:])
820 if data[secondToken] == '}' {
821 comma = false
822 }
823 // Set the top level key at the end (accounting for any trailing whitespace)
824 // This assumes last token is valid like '}', could check and return error
825 endOffset = lastToken(data)
826 }
827 depthOffset := endOffset
828 if depth != 0 {
829 // if subpath is a non-empty object, add to it
830 // or if subpath is a non-empty array, add to it
831 if (data[startOffset] == '{' && data[startOffset+1+nextToken(data[startOffset+1:])] != '}') ||
832 (data[startOffset] == '[' && data[startOffset+1+nextToken(data[startOffset+1:])] == '{') && keys[depth:][0][0] == 91 {
833 depthOffset--
834 startOffset = depthOffset
835 // otherwise, over-write it with a new object
836 } else {
837 comma = false
838 object = true
839 }
840 } else {
841 startOffset = depthOffset
842 }
843 value = append(data[:startOffset], append(createInsertComponent(keys[depth:], setValue, comma, object), data[depthOffset:]...)...)
844 } else {
845 // path currently exists
846 startComponent := data[:startOffset]
847 endComponent := data[endOffset:]
848
849 value = make([]byte, len(startComponent)+len(endComponent)+len(setValue))
850 newEndOffset := startOffset + len(setValue)
851 copy(value[0:startOffset], startComponent)
852 copy(value[startOffset:newEndOffset], setValue)
853 copy(value[newEndOffset:], endComponent)
854 }
855 return value, nil
856}
857
858func getType(data []byte, offset int) ([]byte, ValueType, int, error) {
859 var dataType ValueType
860 endOffset := offset
861
862 // if string value
863 if data[offset] == '"' {
864 dataType = String
865 if idx, _ := stringEnd(data[offset+1:]); idx != -1 {
866 endOffset += idx + 1
867 } else {
868 return nil, dataType, offset, MalformedStringError
869 }
870 } else if data[offset] == '[' { // if array value
871 dataType = Array
872 // break label, for stopping nested loops
873 endOffset = blockEnd(data[offset:], '[', ']')
874
875 if endOffset == -1 {
876 return nil, dataType, offset, MalformedArrayError
877 }
878
879 endOffset += offset
880 } else if data[offset] == '{' { // if object value
881 dataType = Object
882 // break label, for stopping nested loops
883 endOffset = blockEnd(data[offset:], '{', '}')
884
885 if endOffset == -1 {
886 return nil, dataType, offset, MalformedObjectError
887 }
888
889 endOffset += offset
890 } else {
891 // Number, Boolean or None
892 end := tokenEnd(data[endOffset:])
893
894 if end == -1 {
895 return nil, dataType, offset, MalformedValueError
896 }
897
898 value := data[offset : endOffset+end]
899
900 switch data[offset] {
901 case 't', 'f': // true or false
902 if bytes.Equal(value, trueLiteral) || bytes.Equal(value, falseLiteral) {
903 dataType = Boolean
904 } else {
905 return nil, Unknown, offset, UnknownValueTypeError
906 }
907 case 'u', 'n': // undefined or null
908 if bytes.Equal(value, nullLiteral) {
909 dataType = Null
910 } else {
911 return nil, Unknown, offset, UnknownValueTypeError
912 }
913 case '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', '-':
914 dataType = Number
915 default:
916 return nil, Unknown, offset, UnknownValueTypeError
917 }
918
919 endOffset += end
920 }
921 return data[offset:endOffset], dataType, endOffset, nil
922}
923
924/*
925Get - Receives data structure, and key path to extract value from.
926
927Returns:
928`value` - Pointer to original data structure containing key value, or just empty slice if nothing found or error
929`dataType` - Can be: `NotExist`, `String`, `Number`, `Object`, `Array`, `Boolean` or `Null`
930`offset` - Offset from provided data structure where key value ends. Used mostly internally, for example for `ArrayEach` helper.
931`err` - If key not found or any other parsing issue it should return error. If key not found it also sets `dataType` to `NotExist`
932
933Accept multiple keys to specify path to JSON value (in case of quering nested structures).
934If no keys provided it will try to extract closest JSON value (simple ones or object/array), useful for reading streams or arrays, see `ArrayEach` implementation.
935*/
936func Get(data []byte, keys ...string) (value []byte, dataType ValueType, offset int, err error) {
937 a, b, _, d, e := internalGet(data, keys...)
938 return a, b, d, e
939}
940
941func internalGet(data []byte, keys ...string) (value []byte, dataType ValueType, offset, endOffset int, err error) {
942 if len(keys) > 0 {
943 if offset = searchKeys(data, keys...); offset == -1 {
944 return nil, NotExist, -1, -1, KeyPathNotFoundError
945 }
946 }
947
948 // Go to closest value
949 nO := nextToken(data[offset:])
950 if nO == -1 {
951 return nil, NotExist, offset, -1, MalformedJsonError
952 }
953
954 offset += nO
955 value, dataType, endOffset, err = getType(data, offset)
956 if err != nil {
957 return value, dataType, offset, endOffset, err
958 }
959
960 // Strip quotes from string values
961 if dataType == String {
962 value = value[1 : len(value)-1]
963 }
964
965 return value[:len(value):len(value)], dataType, offset, endOffset, nil
966}
967
968// ArrayEach is used when iterating arrays, accepts a callback function with the same return arguments as `Get`.
969func ArrayEach(data []byte, cb func(value []byte, dataType ValueType, offset int, err error), keys ...string) (offset int, err error) {
970 if len(data) == 0 {
971 return -1, MalformedObjectError
972 }
973
974 nT := nextToken(data)
975 if nT == -1 {
976 return -1, MalformedJsonError
977 }
978
979 offset = nT + 1
980
981 if len(keys) > 0 {
982 if offset = searchKeys(data, keys...); offset == -1 {
983 return offset, KeyPathNotFoundError
984 }
985
986 // Go to closest value
987 nO := nextToken(data[offset:])
988 if nO == -1 {
989 return offset, MalformedJsonError
990 }
991
992 offset += nO
993
994 if data[offset] != '[' {
995 return offset, MalformedArrayError
996 }
997
998 offset++
999 }
1000
1001 nO := nextToken(data[offset:])
1002 if nO == -1 {
1003 return offset, MalformedJsonError
1004 }
1005
1006 offset += nO
1007
1008 if data[offset] == ']' {
1009 return offset, nil
1010 }
1011
1012 for true {
1013 v, t, o, e := Get(data[offset:])
1014
1015 if e != nil {
1016 return offset, e
1017 }
1018
1019 if o == 0 {
1020 break
1021 }
1022
1023 if t != NotExist {
1024 cb(v, t, offset+o-len(v), e)
1025 }
1026
1027 if e != nil {
1028 break
1029 }
1030
1031 offset += o
1032
1033 skipToToken := nextToken(data[offset:])
1034 if skipToToken == -1 {
1035 return offset, MalformedArrayError
1036 }
1037 offset += skipToToken
1038
1039 if data[offset] == ']' {
1040 break
1041 }
1042
1043 if data[offset] != ',' {
1044 return offset, MalformedArrayError
1045 }
1046
1047 offset++
1048 }
1049
1050 return offset, nil
1051}
1052
1053// ObjectEach iterates over the key-value pairs of a JSON object, invoking a given callback for each such entry
1054func ObjectEach(data []byte, callback func(key []byte, value []byte, dataType ValueType, offset int) error, keys ...string) (err error) {
1055 offset := 0
1056
1057 // Descend to the desired key, if requested
1058 if len(keys) > 0 {
1059 if off := searchKeys(data, keys...); off == -1 {
1060 return KeyPathNotFoundError
1061 } else {
1062 offset = off
1063 }
1064 }
1065
1066 // Validate and skip past opening brace
1067 if off := nextToken(data[offset:]); off == -1 {
1068 return MalformedObjectError
1069 } else if offset += off; data[offset] != '{' {
1070 return MalformedObjectError
1071 } else {
1072 offset++
1073 }
1074
1075 // Skip to the first token inside the object, or stop if we find the ending brace
1076 if off := nextToken(data[offset:]); off == -1 {
1077 return MalformedJsonError
1078 } else if offset += off; data[offset] == '}' {
1079 return nil
1080 }
1081
1082 // Loop pre-condition: data[offset] points to what should be either the next entry's key, or the closing brace (if it's anything else, the JSON is malformed)
1083 for offset < len(data) {
1084 // Step 1: find the next key
1085 var key []byte
1086
1087 // Check what the the next token is: start of string, end of object, or something else (error)
1088 switch data[offset] {
1089 case '"':
1090 offset++ // accept as string and skip opening quote
1091 case '}':
1092 return nil // we found the end of the object; stop and return success
1093 default:
1094 return MalformedObjectError
1095 }
1096
1097 // Find the end of the key string
1098 var keyEscaped bool
1099 if off, esc := stringEnd(data[offset:]); off == -1 {
1100 return MalformedJsonError
1101 } else {
1102 key, keyEscaped = data[offset:offset+off-1], esc
1103 offset += off
1104 }
1105
1106 // Unescape the string if needed
1107 if keyEscaped {
1108 var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
1109 if keyUnescaped, err := Unescape(key, stackbuf[:]); err != nil {
1110 return MalformedStringEscapeError
1111 } else {
1112 key = keyUnescaped
1113 }
1114 }
1115
1116 // Step 2: skip the colon
1117 if off := nextToken(data[offset:]); off == -1 {
1118 return MalformedJsonError
1119 } else if offset += off; data[offset] != ':' {
1120 return MalformedJsonError
1121 } else {
1122 offset++
1123 }
1124
1125 // Step 3: find the associated value, then invoke the callback
1126 if value, valueType, off, err := Get(data[offset:]); err != nil {
1127 return err
1128 } else if err := callback(key, value, valueType, offset+off); err != nil { // Invoke the callback here!
1129 return err
1130 } else {
1131 offset += off
1132 }
1133
1134 // Step 4: skip over the next comma to the following token, or stop if we hit the ending brace
1135 if off := nextToken(data[offset:]); off == -1 {
1136 return MalformedArrayError
1137 } else {
1138 offset += off
1139 switch data[offset] {
1140 case '}':
1141 return nil // Stop if we hit the close brace
1142 case ',':
1143 offset++ // Ignore the comma
1144 default:
1145 return MalformedObjectError
1146 }
1147 }
1148
1149 // Skip to the next token after the comma
1150 if off := nextToken(data[offset:]); off == -1 {
1151 return MalformedArrayError
1152 } else {
1153 offset += off
1154 }
1155 }
1156
1157 return MalformedObjectError // we shouldn't get here; it's expected that we will return via finding the ending brace
1158}
1159
1160// GetUnsafeString returns the value retrieved by `Get`, use creates string without memory allocation by mapping string to slice memory. It does not handle escape symbols.
1161func GetUnsafeString(data []byte, keys ...string) (val string, err error) {
1162 v, _, _, e := Get(data, keys...)
1163
1164 if e != nil {
1165 return "", e
1166 }
1167
1168 return bytesToString(&v), nil
1169}
1170
1171// GetString returns the value retrieved by `Get`, cast to a string if possible, trying to properly handle escape and utf8 symbols
1172// If key data type do not match, it will return an error.
1173func GetString(data []byte, keys ...string) (val string, err error) {
1174 v, t, _, e := Get(data, keys...)
1175
1176 if e != nil {
1177 return "", e
1178 }
1179
1180 if t != String {
1181 return "", fmt.Errorf("Value is not a string: %s", string(v))
1182 }
1183
1184 // If no escapes return raw content
1185 if bytes.IndexByte(v, '\\') == -1 {
1186 return string(v), nil
1187 }
1188
1189 return ParseString(v)
1190}
1191
1192// GetFloat returns the value retrieved by `Get`, cast to a float64 if possible.
1193// The offset is the same as in `Get`.
1194// If key data type do not match, it will return an error.
1195func GetFloat(data []byte, keys ...string) (val float64, err error) {
1196 v, t, _, e := Get(data, keys...)
1197
1198 if e != nil {
1199 return 0, e
1200 }
1201
1202 if t != Number {
1203 return 0, fmt.Errorf("Value is not a number: %s", string(v))
1204 }
1205
1206 return ParseFloat(v)
1207}
1208
1209// GetInt returns the value retrieved by `Get`, cast to a int64 if possible.
1210// If key data type do not match, it will return an error.
1211func GetInt(data []byte, keys ...string) (val int64, err error) {
1212 v, t, _, e := Get(data, keys...)
1213
1214 if e != nil {
1215 return 0, e
1216 }
1217
1218 if t != Number {
1219 return 0, fmt.Errorf("Value is not a number: %s", string(v))
1220 }
1221
1222 return ParseInt(v)
1223}
1224
1225// GetBoolean returns the value retrieved by `Get`, cast to a bool if possible.
1226// The offset is the same as in `Get`.
1227// If key data type do not match, it will return error.
1228func GetBoolean(data []byte, keys ...string) (val bool, err error) {
1229 v, t, _, e := Get(data, keys...)
1230
1231 if e != nil {
1232 return false, e
1233 }
1234
1235 if t != Boolean {
1236 return false, fmt.Errorf("Value is not a boolean: %s", string(v))
1237 }
1238
1239 return ParseBoolean(v)
1240}
1241
1242// ParseBoolean parses a Boolean ValueType into a Go bool (not particularly useful, but here for completeness)
1243func ParseBoolean(b []byte) (bool, error) {
1244 switch {
1245 case bytes.Equal(b, trueLiteral):
1246 return true, nil
1247 case bytes.Equal(b, falseLiteral):
1248 return false, nil
1249 default:
1250 return false, MalformedValueError
1251 }
1252}
1253
1254// ParseString parses a String ValueType into a Go string (the main parsing work is unescaping the JSON string)
1255func ParseString(b []byte) (string, error) {
1256 var stackbuf [unescapeStackBufSize]byte // stack-allocated array for allocation-free unescaping of small strings
1257 if bU, err := Unescape(b, stackbuf[:]); err != nil {
1258 return "", MalformedValueError
1259 } else {
1260 return string(bU), nil
1261 }
1262}
1263
1264// ParseNumber parses a Number ValueType into a Go float64
1265func ParseFloat(b []byte) (float64, error) {
1266 if v, err := parseFloat(&b); err != nil {
1267 return 0, MalformedValueError
1268 } else {
1269 return v, nil
1270 }
1271}
1272
1273// ParseInt parses a Number ValueType into a Go int64
1274func ParseInt(b []byte) (int64, error) {
1275 if v, ok, overflow := parseInt(b); !ok {
1276 if overflow {
1277 return 0, OverflowIntegerError
1278 }
1279 return 0, MalformedValueError
1280 } else {
1281 return v, nil
1282 }
1283}