parser.go

  1package interpolate
  2
  3import (
  4	"fmt"
  5	"strconv"
  6	"strings"
  7	"unicode"
  8	"unicode/utf8"
  9)
 10
 11// This is a recursive descent parser for our grammar. Because it can contain nested expressions
 12// like ${LLAMAS:-${ROCK:-true}} we can't use regular expressions. The simplest possible alternative
 13// is a recursive parser like this. It parses a chunk and then calls a function to parse that
 14// further and so on and so forth. It results in a tree of objects that represent the things we've
 15// parsed (an AST). This means that the logic for how expansions work lives in those objects, and
 16// the logic for how we go from plain text to parsed objects lives here.
 17//
 18// To keep things simple, we do our "lexing" or "scanning" just as a few functions at the end of the
 19// file rather than as a dedicated lexer that emits tokens. This matches the simplicity of the
 20// format we are parsing relatively well
 21//
 22// Below is an EBNF grammar for the language. The parser was built by basically turning this into
 23// functions and structs named the same reading the string bite by bite (peekRune and nextRune)
 24
 25/*
 26EscapedBackslash = "\\" EscapedDollar    = ( "\$" | "$$") Identifier       = letter { letters |
 27digit | "_" } Expansion        = "$" ( Identifier | Brace ) Brace            = "{" Identifier [
 28Identifier BraceOperation ] "}" Text = { EscapedBackslash | EscapedDollar | all characters except
 29"$" } Expression = { Text | Expansion } EmptyValue       = ":-" { Expression } UnsetValue       =
 30"-" { Expression } Substring        = ":" number [ ":" number ] Required = "?" { Expression }
 31Operation        = EmptyValue | UnsetValue | Substring | Required
 32*/
 33
 34const (
 35	eof = -1
 36)
 37
 38// Parser takes a string and parses out a tree of structs that represent text and Expansions
 39type Parser struct {
 40	input string // the string we are scanning
 41	pos   int    // the current position
 42}
 43
 44// NewParser returns a new instance of a Parser
 45func NewParser(str string) *Parser {
 46	return &Parser{
 47		input: str,
 48		pos:   0,
 49	}
 50}
 51
 52// Parse expansions out of the internal text and return them as a tree of Expressions
 53func (p *Parser) Parse() (Expression, error) {
 54	return p.parseExpression()
 55}
 56
 57func (p *Parser) parseExpression(stop ...rune) (Expression, error) {
 58	var expr Expression
 59	var stopStr = string(stop)
 60
 61	for {
 62		c := p.peekRune()
 63		if c == eof || strings.ContainsRune(stopStr, c) {
 64			break
 65		}
 66
 67		// check for our escaped characters first, as we assume nothing subsequently is escaped
 68		if strings.HasPrefix(p.input[p.pos:], `\\`) {
 69			p.pos += 2
 70			expr = append(expr, ExpressionItem{Text: `\\`})
 71			continue
 72		} else if strings.HasPrefix(p.input[p.pos:], `\$`) || strings.HasPrefix(p.input[p.pos:], `$$`) {
 73			p.pos += 2
 74			expr = append(expr, ExpressionItem{Text: `$`})
 75			continue
 76		}
 77
 78		// Ignore bash shell expansions
 79		if strings.HasPrefix(p.input[p.pos:], `$(`) {
 80			p.pos += 2
 81			expr = append(expr, ExpressionItem{Text: `$(`})
 82			continue
 83		}
 84
 85		// If we run into a dollar sign and it's not the last char, it's an expansion
 86		if c == '$' && p.pos < (len(p.input)-1) {
 87			expansion, err := p.parseExpansion()
 88			if err != nil {
 89				return nil, err
 90			}
 91			expr = append(expr, ExpressionItem{Expansion: expansion})
 92			continue
 93		}
 94
 95		// nibble a character, otherwise if it's a \ or a $ we can loop
 96		c = p.nextRune()
 97
 98		// Scan as much as we can into text
 99		text := p.scanUntil(func(r rune) bool {
100			return (r == '$' || r == '\\' || strings.ContainsRune(stopStr, r))
101		})
102
103		expr = append(expr, ExpressionItem{Text: string(c) + text})
104	}
105
106	return expr, nil
107}
108
109func (p *Parser) parseExpansion() (Expansion, error) {
110	if c := p.nextRune(); c != '$' {
111		return nil, fmt.Errorf("Expected expansion to start with $, got %c", c)
112	}
113
114	// if we have an open brace, this is a brace expansion
115	if c := p.peekRune(); c == '{' {
116		return p.parseBraceExpansion()
117	}
118
119	identifier, err := p.scanIdentifier()
120	if err != nil {
121		return nil, err
122	}
123
124	return VariableExpansion{Identifier: identifier}, nil
125}
126
127func (p *Parser) parseBraceExpansion() (Expansion, error) {
128	if c := p.nextRune(); c != '{' {
129		return nil, fmt.Errorf("Expected brace expansion to start with {, got %c", c)
130	}
131
132	identifier, err := p.scanIdentifier()
133	if err != nil {
134		return nil, err
135	}
136
137	if c := p.peekRune(); c == '}' {
138		_ = p.nextRune()
139		return VariableExpansion{Identifier: identifier}, nil
140	}
141
142	var operator string
143	var exp Expansion
144
145	// Parse an operator, some trickery is needed to handle : vs :-
146	if op1 := p.nextRune(); op1 == ':' {
147		if op2 := p.peekRune(); op2 == '-' {
148			_ = p.nextRune()
149			operator = ":-"
150		} else {
151			operator = ":"
152		}
153	} else if op1 == '?' || op1 == '-' {
154		operator = string(op1)
155	} else {
156		return nil, fmt.Errorf("Expected an operator, got %c", op1)
157	}
158
159	switch operator {
160	case `:-`:
161		exp, err = p.parseEmptyValueExpansion(identifier)
162		if err != nil {
163			return nil, err
164		}
165	case `-`:
166		exp, err = p.parseUnsetValueExpansion(identifier)
167		if err != nil {
168			return nil, err
169		}
170	case `:`:
171		exp, err = p.parseSubstringExpansion(identifier)
172		if err != nil {
173			return nil, err
174		}
175	case `?`:
176		exp, err = p.parseRequiredExpansion(identifier)
177		if err != nil {
178			return nil, err
179		}
180	}
181
182	if c := p.nextRune(); c != '}' {
183		return nil, fmt.Errorf("Expected brace expansion to end with }, got %c", c)
184	}
185
186	return exp, nil
187}
188
189func (p *Parser) parseEmptyValueExpansion(identifier string) (Expansion, error) {
190	// parse an expression (text and expansions) up until the end of the brace
191	expr, err := p.parseExpression('}')
192	if err != nil {
193		return nil, err
194	}
195
196	return EmptyValueExpansion{Identifier: identifier, Content: expr}, nil
197}
198
199func (p *Parser) parseUnsetValueExpansion(identifier string) (Expansion, error) {
200	expr, err := p.parseExpression('}')
201	if err != nil {
202		return nil, err
203	}
204
205	return UnsetValueExpansion{Identifier: identifier, Content: expr}, nil
206}
207
208func (p *Parser) parseSubstringExpansion(identifier string) (Expansion, error) {
209	offset := p.scanUntil(func(r rune) bool {
210		return r == ':' || r == '}'
211	})
212
213	offsetInt, err := strconv.Atoi(strings.TrimSpace(offset))
214	if err != nil {
215		return nil, fmt.Errorf("Unable to parse offset: %v", err)
216	}
217
218	if c := p.peekRune(); c == '}' {
219		return SubstringExpansion{Identifier: identifier, Offset: offsetInt}, nil
220	}
221
222	_ = p.nextRune()
223	length := p.scanUntil(func(r rune) bool {
224		return r == '}'
225	})
226
227	lengthInt, err := strconv.Atoi(strings.TrimSpace(length))
228	if err != nil {
229		return nil, fmt.Errorf("Unable to parse length: %v", err)
230	}
231
232	return SubstringExpansion{Identifier: identifier, Offset: offsetInt, Length: lengthInt, HasLength: true}, nil
233}
234
235func (p *Parser) parseRequiredExpansion(identifier string) (Expansion, error) {
236	expr, err := p.parseExpression('}')
237	if err != nil {
238		return nil, err
239	}
240
241	return RequiredExpansion{Identifier: identifier, Message: expr}, nil
242}
243
244func (p *Parser) scanUntil(f func(rune) bool) string {
245	start := p.pos
246	for int(p.pos) < len(p.input) {
247		c, size := utf8.DecodeRuneInString(p.input[p.pos:])
248		if c == utf8.RuneError || f(c) {
249			break
250		}
251		p.pos += size
252	}
253	return p.input[start:p.pos]
254}
255
256func (p *Parser) scanIdentifier() (string, error) {
257	if c := p.peekRune(); !unicode.IsLetter(c) {
258		return "", fmt.Errorf("Expected identifier to start with a letter, got %c", c)
259	}
260	var notIdentifierChar = func(r rune) bool {
261		return (!unicode.IsLetter(r) && !unicode.IsNumber(r) && r != '_')
262	}
263	return p.scanUntil(notIdentifierChar), nil
264}
265
266func (p *Parser) nextRune() rune {
267	if int(p.pos) >= len(p.input) {
268		return eof
269	}
270	c, size := utf8.DecodeRuneInString(p.input[p.pos:])
271	p.pos += size
272	return c
273}
274
275func (p *Parser) peekRune() rune {
276	if int(p.pos) >= len(p.input) {
277		return eof
278	}
279	c, _ := utf8.DecodeRuneInString(p.input[p.pos:])
280	return c
281}