decode.go

  1package logfmt
  2
  3import (
  4	"bufio"
  5	"bytes"
  6	"fmt"
  7	"io"
  8	"unicode/utf8"
  9)
 10
 11// A Decoder reads and decodes logfmt records from an input stream.
 12type Decoder struct {
 13	pos     int
 14	key     []byte
 15	value   []byte
 16	lineNum int
 17	s       *bufio.Scanner
 18	err     error
 19}
 20
 21// NewDecoder returns a new decoder that reads from r.
 22//
 23// The decoder introduces its own buffering and may read data from r beyond
 24// the logfmt records requested.
 25func NewDecoder(r io.Reader) *Decoder {
 26	dec := &Decoder{
 27		s: bufio.NewScanner(r),
 28	}
 29	return dec
 30}
 31
 32// NewDecoderSize returns a new decoder that reads from r.
 33//
 34// The decoder introduces its own buffering and may read data from r beyond
 35// the logfmt records requested.
 36// The size argument specifies the size of the initial buffer that the
 37// Decoder will use to read records from r.
 38// If a log line is longer than the size argument, the Decoder will return
 39// a bufio.ErrTooLong error.
 40func NewDecoderSize(r io.Reader, size int) *Decoder {
 41	scanner := bufio.NewScanner(r)
 42	scanner.Buffer(make([]byte, 0, size), size)
 43	dec := &Decoder{
 44		s: scanner,
 45	}
 46	return dec
 47}
 48
 49// ScanRecord advances the Decoder to the next record, which can then be
 50// parsed with the ScanKeyval method. It returns false when decoding stops,
 51// either by reaching the end of the input or an error. After ScanRecord
 52// returns false, the Err method will return any error that occurred during
 53// decoding, except that if it was io.EOF, Err will return nil.
 54func (dec *Decoder) ScanRecord() bool {
 55	if dec.err != nil {
 56		return false
 57	}
 58	if !dec.s.Scan() {
 59		dec.err = dec.s.Err()
 60		return false
 61	}
 62	dec.lineNum++
 63	dec.pos = 0
 64	return true
 65}
 66
 67// ScanKeyval advances the Decoder to the next key/value pair of the current
 68// record, which can then be retrieved with the Key and Value methods. It
 69// returns false when decoding stops, either by reaching the end of the
 70// current record or an error.
 71func (dec *Decoder) ScanKeyval() bool {
 72	dec.key, dec.value = nil, nil
 73	if dec.err != nil {
 74		return false
 75	}
 76
 77	line := dec.s.Bytes()
 78
 79	// garbage
 80	for p, c := range line[dec.pos:] {
 81		if c > ' ' {
 82			dec.pos += p
 83			goto key
 84		}
 85	}
 86	dec.pos = len(line)
 87	return false
 88
 89key:
 90	const invalidKeyError = "invalid key"
 91
 92	start, multibyte := dec.pos, false
 93	for p, c := range line[dec.pos:] {
 94		switch {
 95		case c == '=':
 96			dec.pos += p
 97			if dec.pos > start {
 98				dec.key = line[start:dec.pos]
 99				if multibyte && bytes.ContainsRune(dec.key, utf8.RuneError) {
100					dec.syntaxError(invalidKeyError)
101					return false
102				}
103			}
104			if dec.key == nil {
105				dec.unexpectedByte(c)
106				return false
107			}
108			goto equal
109		case c == '"':
110			dec.pos += p
111			dec.unexpectedByte(c)
112			return false
113		case c <= ' ':
114			dec.pos += p
115			if dec.pos > start {
116				dec.key = line[start:dec.pos]
117				if multibyte && bytes.ContainsRune(dec.key, utf8.RuneError) {
118					dec.syntaxError(invalidKeyError)
119					return false
120				}
121			}
122			return true
123		case c >= utf8.RuneSelf:
124			multibyte = true
125		}
126	}
127	dec.pos = len(line)
128	if dec.pos > start {
129		dec.key = line[start:dec.pos]
130		if multibyte && bytes.ContainsRune(dec.key, utf8.RuneError) {
131			dec.syntaxError(invalidKeyError)
132			return false
133		}
134	}
135	return true
136
137equal:
138	dec.pos++
139	if dec.pos >= len(line) {
140		return true
141	}
142	switch c := line[dec.pos]; {
143	case c <= ' ':
144		return true
145	case c == '"':
146		goto qvalue
147	}
148
149	// value
150	start = dec.pos
151	for p, c := range line[dec.pos:] {
152		switch {
153		case c == '=' || c == '"':
154			dec.pos += p
155			dec.unexpectedByte(c)
156			return false
157		case c <= ' ':
158			dec.pos += p
159			if dec.pos > start {
160				dec.value = line[start:dec.pos]
161			}
162			return true
163		}
164	}
165	dec.pos = len(line)
166	if dec.pos > start {
167		dec.value = line[start:dec.pos]
168	}
169	return true
170
171qvalue:
172	const (
173		untermQuote  = "unterminated quoted value"
174		invalidQuote = "invalid quoted value"
175	)
176
177	hasEsc, esc := false, false
178	start = dec.pos
179	for p, c := range line[dec.pos+1:] {
180		switch {
181		case esc:
182			esc = false
183		case c == '\\':
184			hasEsc, esc = true, true
185		case c == '"':
186			dec.pos += p + 2
187			if hasEsc {
188				v, ok := unquoteBytes(line[start:dec.pos])
189				if !ok {
190					dec.syntaxError(invalidQuote)
191					return false
192				}
193				dec.value = v
194			} else {
195				start++
196				end := dec.pos - 1
197				if end > start {
198					dec.value = line[start:end]
199				}
200			}
201			return true
202		}
203	}
204	dec.pos = len(line)
205	dec.syntaxError(untermQuote)
206	return false
207}
208
209// Key returns the most recent key found by a call to ScanKeyval. The returned
210// slice may point to internal buffers and is only valid until the next call
211// to ScanRecord.  It does no allocation.
212func (dec *Decoder) Key() []byte {
213	return dec.key
214}
215
216// Value returns the most recent value found by a call to ScanKeyval. The
217// returned slice may point to internal buffers and is only valid until the
218// next call to ScanRecord.  It does no allocation when the value has no
219// escape sequences.
220func (dec *Decoder) Value() []byte {
221	return dec.value
222}
223
224// Err returns the first non-EOF error that was encountered by the Scanner.
225func (dec *Decoder) Err() error {
226	return dec.err
227}
228
229func (dec *Decoder) syntaxError(msg string) {
230	dec.err = &SyntaxError{
231		Msg:  msg,
232		Line: dec.lineNum,
233		Pos:  dec.pos + 1,
234	}
235}
236
237func (dec *Decoder) unexpectedByte(c byte) {
238	dec.err = &SyntaxError{
239		Msg:  fmt.Sprintf("unexpected %q", c),
240		Line: dec.lineNum,
241		Pos:  dec.pos + 1,
242	}
243}
244
245// A SyntaxError represents a syntax error in the logfmt input stream.
246type SyntaxError struct {
247	Msg  string
248	Line int
249	Pos  int
250}
251
252func (e *SyntaxError) Error() string {
253	return fmt.Sprintf("logfmt syntax error at pos %d on line %d: %s", e.Pos, e.Line, e.Msg)
254}