1package ansi
2
3import (
4 "unicode/utf8"
5 "unsafe"
6
7 "github.com/charmbracelet/x/ansi/parser"
8)
9
10// Parser represents a DEC ANSI compatible sequence parser.
11//
12// It uses a state machine to parse ANSI escape sequences and control
13// characters. The parser is designed to be used with a terminal emulator or
14// similar application that needs to parse ANSI escape sequences and control
15// characters.
16// See package [parser] for more information.
17//
18//go:generate go run ./gen.go
19type Parser struct {
20 handler Handler
21
22 // params contains the raw parameters of the sequence.
23 // These parameters used when constructing CSI and DCS sequences.
24 params []int
25
26 // data contains the raw data of the sequence.
27 // These data used when constructing OSC, DCS, SOS, PM, and APC sequences.
28 data []byte
29
30 // dataLen keeps track of the length of the data buffer.
31 // If dataLen is -1, the data buffer is unlimited and will grow as needed.
32 // Otherwise, dataLen is limited by the size of the data buffer.
33 dataLen int
34
35 // paramsLen keeps track of the number of parameters.
36 // This is limited by the size of the params buffer.
37 //
38 // This is also used when collecting UTF-8 runes to keep track of the
39 // number of rune bytes collected.
40 paramsLen int
41
42 // cmd contains the raw command along with the private prefix and
43 // intermediate bytes of the sequence.
44 // The first lower byte contains the command byte, the next byte contains
45 // the private prefix, and the next byte contains the intermediate byte.
46 //
47 // This is also used when collecting UTF-8 runes treating it as a slice of
48 // 4 bytes.
49 cmd int
50
51 // state is the current state of the parser.
52 state byte
53}
54
55// NewParser returns a new parser with the default settings.
56// The [Parser] uses a default size of 32 for the parameters and 64KB for the
57// data buffer. Use [Parser.SetParamsSize] and [Parser.SetDataSize] to set the
58// size of the parameters and data buffer respectively.
59func NewParser() *Parser {
60 p := new(Parser)
61 p.SetParamsSize(parser.MaxParamsSize)
62 p.SetDataSize(1024 * 64) // 64KB data buffer
63 return p
64}
65
66// SetParamsSize sets the size of the parameters buffer.
67// This is used when constructing CSI and DCS sequences.
68func (p *Parser) SetParamsSize(size int) {
69 p.params = make([]int, size)
70}
71
72// SetDataSize sets the size of the data buffer.
73// This is used when constructing OSC, DCS, SOS, PM, and APC sequences.
74// If size is less than or equal to 0, the data buffer is unlimited and will
75// grow as needed.
76func (p *Parser) SetDataSize(size int) {
77 if size <= 0 {
78 size = 0
79 p.dataLen = -1
80 }
81 p.data = make([]byte, size)
82}
83
84// Params returns the list of parsed packed parameters.
85func (p *Parser) Params() Params {
86 return unsafe.Slice((*Param)(unsafe.Pointer(&p.params[0])), p.paramsLen)
87}
88
89// Param returns the parameter at the given index and falls back to the default
90// value if the parameter is missing. If the index is out of bounds, it returns
91// the default value and false.
92func (p *Parser) Param(i, def int) (int, bool) {
93 if i < 0 || i >= p.paramsLen {
94 return def, false
95 }
96 return Param(p.params[i]).Param(def), true
97}
98
99// Command returns the packed command of the last dispatched sequence. Use
100// [Cmd] to unpack the command.
101func (p *Parser) Command() int {
102 return p.cmd
103}
104
105// Rune returns the last dispatched sequence as a rune.
106func (p *Parser) Rune() rune {
107 rw := utf8ByteLen(byte(p.cmd & 0xff))
108 if rw == -1 {
109 return utf8.RuneError
110 }
111 r, _ := utf8.DecodeRune((*[utf8.UTFMax]byte)(unsafe.Pointer(&p.cmd))[:rw])
112 return r
113}
114
115// Control returns the last dispatched sequence as a control code.
116func (p *Parser) Control() byte {
117 return byte(p.cmd & 0xff)
118}
119
120// Data returns the raw data of the last dispatched sequence.
121func (p *Parser) Data() []byte {
122 return p.data[:p.dataLen]
123}
124
125// Reset resets the parser to its initial state.
126func (p *Parser) Reset() {
127 p.clear()
128 p.state = parser.GroundState
129}
130
131// clear clears the parser parameters and command.
132func (p *Parser) clear() {
133 if len(p.params) > 0 {
134 p.params[0] = parser.MissingParam
135 }
136 p.paramsLen = 0
137 p.cmd = 0
138}
139
140// State returns the current state of the parser.
141func (p *Parser) State() parser.State {
142 return p.state
143}
144
145// StateName returns the name of the current state.
146func (p *Parser) StateName() string {
147 return parser.StateNames[p.state]
148}
149
150// Parse parses the given dispatcher and byte buffer.
151// Deprecated: Loop over the buffer and call [Parser.Advance] instead.
152func (p *Parser) Parse(b []byte) {
153 for i := range b {
154 p.Advance(b[i])
155 }
156}
157
158// Advance advances the parser using the given byte. It returns the action
159// performed by the parser.
160func (p *Parser) Advance(b byte) parser.Action {
161 switch p.state {
162 case parser.Utf8State:
163 // We handle UTF-8 here.
164 return p.advanceUtf8(b)
165 default:
166 return p.advance(b)
167 }
168}
169
170func (p *Parser) collectRune(b byte) {
171 if p.paramsLen >= utf8.UTFMax {
172 return
173 }
174
175 shift := p.paramsLen * 8
176 p.cmd &^= 0xff << shift
177 p.cmd |= int(b) << shift
178 p.paramsLen++
179}
180
181func (p *Parser) advanceUtf8(b byte) parser.Action {
182 // Collect UTF-8 rune bytes.
183 p.collectRune(b)
184 rw := utf8ByteLen(byte(p.cmd & 0xff))
185 if rw == -1 {
186 // We panic here because the first byte comes from the state machine,
187 // if this panics, it means there is a bug in the state machine!
188 panic("invalid rune") // unreachable
189 }
190
191 if p.paramsLen < rw {
192 return parser.CollectAction
193 }
194
195 // We have enough bytes to decode the rune using unsafe
196 if p.handler.Print != nil {
197 p.handler.Print(p.Rune())
198 }
199
200 p.state = parser.GroundState
201 p.paramsLen = 0
202
203 return parser.PrintAction
204}
205
206func (p *Parser) advance(b byte) parser.Action {
207 state, action := parser.Table.Transition(p.state, b)
208
209 // We need to clear the parser state if the state changes from EscapeState.
210 // This is because when we enter the EscapeState, we don't get a chance to
211 // clear the parser state. For example, when a sequence terminates with a
212 // ST (\x1b\\ or \x9c), we dispatch the current sequence and transition to
213 // EscapeState. However, the parser state is not cleared in this case and
214 // we need to clear it here before dispatching the esc sequence.
215 if p.state != state {
216 if p.state == parser.EscapeState {
217 p.performAction(parser.ClearAction, state, b)
218 }
219 if action == parser.PutAction &&
220 p.state == parser.DcsEntryState && state == parser.DcsStringState {
221 // XXX: This is a special case where we need to start collecting
222 // non-string parameterized data i.e. doesn't follow the ECMA-48 §
223 // 5.4.1 string parameters format.
224 p.performAction(parser.StartAction, state, 0)
225 }
226 }
227
228 // Handle special cases
229 switch {
230 case b == ESC && p.state == parser.EscapeState:
231 // Two ESCs in a row
232 p.performAction(parser.ExecuteAction, state, b)
233 default:
234 p.performAction(action, state, b)
235 }
236
237 p.state = state
238
239 return action
240}
241
242func (p *Parser) parseStringCmd() {
243 // Try to parse the command
244 datalen := len(p.data)
245 if p.dataLen >= 0 {
246 datalen = p.dataLen
247 }
248 for i := range datalen {
249 d := p.data[i]
250 if d < '0' || d > '9' {
251 break
252 }
253 if p.cmd == parser.MissingCommand {
254 p.cmd = 0
255 }
256 p.cmd *= 10
257 p.cmd += int(d - '0')
258 }
259}
260
261func (p *Parser) performAction(action parser.Action, state parser.State, b byte) {
262 switch action {
263 case parser.IgnoreAction:
264 break
265
266 case parser.ClearAction:
267 p.clear()
268
269 case parser.PrintAction:
270 p.cmd = int(b)
271 if p.handler.Print != nil {
272 p.handler.Print(rune(b))
273 }
274
275 case parser.ExecuteAction:
276 p.cmd = int(b)
277 if p.handler.Execute != nil {
278 p.handler.Execute(b)
279 }
280
281 case parser.PrefixAction:
282 // Collect private prefix
283 // we only store the last prefix
284 p.cmd &^= 0xff << parser.PrefixShift
285 p.cmd |= int(b) << parser.PrefixShift
286
287 case parser.CollectAction:
288 if state == parser.Utf8State {
289 // Reset the UTF-8 counter
290 p.paramsLen = 0
291 p.collectRune(b)
292 } else {
293 // Collect intermediate bytes
294 // we only store the last intermediate byte
295 p.cmd &^= 0xff << parser.IntermedShift
296 p.cmd |= int(b) << parser.IntermedShift
297 }
298
299 case parser.ParamAction:
300 // Collect parameters
301 if p.paramsLen >= len(p.params) {
302 break
303 }
304
305 if b >= '0' && b <= '9' {
306 if p.params[p.paramsLen] == parser.MissingParam {
307 p.params[p.paramsLen] = 0
308 }
309
310 p.params[p.paramsLen] *= 10
311 p.params[p.paramsLen] += int(b - '0')
312 }
313
314 if b == ':' {
315 p.params[p.paramsLen] |= parser.HasMoreFlag
316 }
317
318 if b == ';' || b == ':' {
319 p.paramsLen++
320 if p.paramsLen < len(p.params) {
321 p.params[p.paramsLen] = parser.MissingParam
322 }
323 }
324
325 case parser.StartAction:
326 if p.dataLen < 0 && p.data != nil {
327 p.data = p.data[:0]
328 } else {
329 p.dataLen = 0
330 }
331 if p.state >= parser.DcsEntryState && p.state <= parser.DcsStringState {
332 // Collect the command byte for DCS
333 p.cmd |= int(b)
334 } else {
335 p.cmd = parser.MissingCommand
336 }
337
338 case parser.PutAction:
339 switch p.state {
340 case parser.OscStringState:
341 if b == ';' && p.cmd == parser.MissingCommand {
342 p.parseStringCmd()
343 }
344 }
345
346 if p.dataLen < 0 {
347 p.data = append(p.data, b)
348 } else {
349 if p.dataLen < len(p.data) {
350 p.data[p.dataLen] = b
351 p.dataLen++
352 }
353 }
354
355 case parser.DispatchAction:
356 // Increment the last parameter
357 if p.paramsLen > 0 && p.paramsLen < len(p.params)-1 ||
358 p.paramsLen == 0 && len(p.params) > 0 && p.params[0] != parser.MissingParam {
359 p.paramsLen++
360 }
361
362 if p.state == parser.OscStringState && p.cmd == parser.MissingCommand {
363 // Ensure we have a command for OSC
364 p.parseStringCmd()
365 }
366
367 data := p.data
368 if p.dataLen >= 0 {
369 data = data[:p.dataLen]
370 }
371 switch p.state {
372 case parser.CsiEntryState, parser.CsiParamState, parser.CsiIntermediateState:
373 p.cmd |= int(b)
374 if p.handler.HandleCsi != nil {
375 p.handler.HandleCsi(Cmd(p.cmd), p.Params())
376 }
377 case parser.EscapeState, parser.EscapeIntermediateState:
378 p.cmd |= int(b)
379 if p.handler.HandleEsc != nil {
380 p.handler.HandleEsc(Cmd(p.cmd))
381 }
382 case parser.DcsEntryState, parser.DcsParamState, parser.DcsIntermediateState, parser.DcsStringState:
383 if p.handler.HandleDcs != nil {
384 p.handler.HandleDcs(Cmd(p.cmd), p.Params(), data)
385 }
386 case parser.OscStringState:
387 if p.handler.HandleOsc != nil {
388 p.handler.HandleOsc(p.cmd, data)
389 }
390 case parser.SosStringState:
391 if p.handler.HandleSos != nil {
392 p.handler.HandleSos(data)
393 }
394 case parser.PmStringState:
395 if p.handler.HandlePm != nil {
396 p.handler.HandlePm(data)
397 }
398 case parser.ApcStringState:
399 if p.handler.HandleApc != nil {
400 p.handler.HandleApc(data)
401 }
402 }
403 }
404}
405
406func utf8ByteLen(b byte) int {
407 if b <= 0b0111_1111 { // 0x00-0x7F
408 return 1
409 } else if b >= 0b1100_0000 && b <= 0b1101_1111 { // 0xC0-0xDF
410 return 2
411 } else if b >= 0b1110_0000 && b <= 0b1110_1111 { // 0xE0-0xEF
412 return 3
413 } else if b >= 0b1111_0000 && b <= 0b1111_0111 { // 0xF0-0xF7
414 return 4
415 }
416 return -1
417}