1package logfmt
2
3import (
4 "bufio"
5 "bytes"
6 "fmt"
7 "io"
8 "unicode/utf8"
9)
10
11// A Decoder reads and decodes logfmt records from an input stream.
12type Decoder struct {
13 pos int
14 key []byte
15 value []byte
16 lineNum int
17 s *bufio.Scanner
18 err error
19}
20
21// NewDecoder returns a new decoder that reads from r.
22//
23// The decoder introduces its own buffering and may read data from r beyond
24// the logfmt records requested.
25func NewDecoder(r io.Reader) *Decoder {
26 dec := &Decoder{
27 s: bufio.NewScanner(r),
28 }
29 return dec
30}
31
32// NewDecoderSize returns a new decoder that reads from r.
33//
34// The decoder introduces its own buffering and may read data from r beyond
35// the logfmt records requested.
36// The size argument specifies the size of the initial buffer that the
37// Decoder will use to read records from r.
38// If a log line is longer than the size argument, the Decoder will return
39// a bufio.ErrTooLong error.
40func NewDecoderSize(r io.Reader, size int) *Decoder {
41 scanner := bufio.NewScanner(r)
42 scanner.Buffer(make([]byte, 0, size), size)
43 dec := &Decoder{
44 s: scanner,
45 }
46 return dec
47}
48
49// ScanRecord advances the Decoder to the next record, which can then be
50// parsed with the ScanKeyval method. It returns false when decoding stops,
51// either by reaching the end of the input or an error. After ScanRecord
52// returns false, the Err method will return any error that occurred during
53// decoding, except that if it was io.EOF, Err will return nil.
54func (dec *Decoder) ScanRecord() bool {
55 if dec.err != nil {
56 return false
57 }
58 if !dec.s.Scan() {
59 dec.err = dec.s.Err()
60 return false
61 }
62 dec.lineNum++
63 dec.pos = 0
64 return true
65}
66
67// ScanKeyval advances the Decoder to the next key/value pair of the current
68// record, which can then be retrieved with the Key and Value methods. It
69// returns false when decoding stops, either by reaching the end of the
70// current record or an error.
71func (dec *Decoder) ScanKeyval() bool {
72 dec.key, dec.value = nil, nil
73 if dec.err != nil {
74 return false
75 }
76
77 line := dec.s.Bytes()
78
79 // garbage
80 for p, c := range line[dec.pos:] {
81 if c > ' ' {
82 dec.pos += p
83 goto key
84 }
85 }
86 dec.pos = len(line)
87 return false
88
89key:
90 const invalidKeyError = "invalid key"
91
92 start, multibyte := dec.pos, false
93 for p, c := range line[dec.pos:] {
94 switch {
95 case c == '=':
96 dec.pos += p
97 if dec.pos > start {
98 dec.key = line[start:dec.pos]
99 if multibyte && bytes.ContainsRune(dec.key, utf8.RuneError) {
100 dec.syntaxError(invalidKeyError)
101 return false
102 }
103 }
104 if dec.key == nil {
105 dec.unexpectedByte(c)
106 return false
107 }
108 goto equal
109 case c == '"':
110 dec.pos += p
111 dec.unexpectedByte(c)
112 return false
113 case c <= ' ':
114 dec.pos += p
115 if dec.pos > start {
116 dec.key = line[start:dec.pos]
117 if multibyte && bytes.ContainsRune(dec.key, utf8.RuneError) {
118 dec.syntaxError(invalidKeyError)
119 return false
120 }
121 }
122 return true
123 case c >= utf8.RuneSelf:
124 multibyte = true
125 }
126 }
127 dec.pos = len(line)
128 if dec.pos > start {
129 dec.key = line[start:dec.pos]
130 if multibyte && bytes.ContainsRune(dec.key, utf8.RuneError) {
131 dec.syntaxError(invalidKeyError)
132 return false
133 }
134 }
135 return true
136
137equal:
138 dec.pos++
139 if dec.pos >= len(line) {
140 return true
141 }
142 switch c := line[dec.pos]; {
143 case c <= ' ':
144 return true
145 case c == '"':
146 goto qvalue
147 }
148
149 // value
150 start = dec.pos
151 for p, c := range line[dec.pos:] {
152 switch {
153 case c == '=' || c == '"':
154 dec.pos += p
155 dec.unexpectedByte(c)
156 return false
157 case c <= ' ':
158 dec.pos += p
159 if dec.pos > start {
160 dec.value = line[start:dec.pos]
161 }
162 return true
163 }
164 }
165 dec.pos = len(line)
166 if dec.pos > start {
167 dec.value = line[start:dec.pos]
168 }
169 return true
170
171qvalue:
172 const (
173 untermQuote = "unterminated quoted value"
174 invalidQuote = "invalid quoted value"
175 )
176
177 hasEsc, esc := false, false
178 start = dec.pos
179 for p, c := range line[dec.pos+1:] {
180 switch {
181 case esc:
182 esc = false
183 case c == '\\':
184 hasEsc, esc = true, true
185 case c == '"':
186 dec.pos += p + 2
187 if hasEsc {
188 v, ok := unquoteBytes(line[start:dec.pos])
189 if !ok {
190 dec.syntaxError(invalidQuote)
191 return false
192 }
193 dec.value = v
194 } else {
195 start++
196 end := dec.pos - 1
197 if end > start {
198 dec.value = line[start:end]
199 }
200 }
201 return true
202 }
203 }
204 dec.pos = len(line)
205 dec.syntaxError(untermQuote)
206 return false
207}
208
209// Key returns the most recent key found by a call to ScanKeyval. The returned
210// slice may point to internal buffers and is only valid until the next call
211// to ScanRecord. It does no allocation.
212func (dec *Decoder) Key() []byte {
213 return dec.key
214}
215
216// Value returns the most recent value found by a call to ScanKeyval. The
217// returned slice may point to internal buffers and is only valid until the
218// next call to ScanRecord. It does no allocation when the value has no
219// escape sequences.
220func (dec *Decoder) Value() []byte {
221 return dec.value
222}
223
224// Err returns the first non-EOF error that was encountered by the Scanner.
225func (dec *Decoder) Err() error {
226 return dec.err
227}
228
229func (dec *Decoder) syntaxError(msg string) {
230 dec.err = &SyntaxError{
231 Msg: msg,
232 Line: dec.lineNum,
233 Pos: dec.pos + 1,
234 }
235}
236
237func (dec *Decoder) unexpectedByte(c byte) {
238 dec.err = &SyntaxError{
239 Msg: fmt.Sprintf("unexpected %q", c),
240 Line: dec.lineNum,
241 Pos: dec.pos + 1,
242 }
243}
244
245// A SyntaxError represents a syntax error in the logfmt input stream.
246type SyntaxError struct {
247 Msg string
248 Line int
249 Pos int
250}
251
252func (e *SyntaxError) Error() string {
253 return fmt.Sprintf("logfmt syntax error at pos %d on line %d: %s", e.Pos, e.Line, e.Msg)
254}