1package interpolate
2
3import (
4 "fmt"
5 "strconv"
6 "strings"
7 "unicode"
8 "unicode/utf8"
9)
10
11// This is a recursive descent parser for our grammar. Because it can contain nested expressions
12// like ${LLAMAS:-${ROCK:-true}} we can't use regular expressions. The simplest possible alternative
13// is a recursive parser like this. It parses a chunk and then calls a function to parse that
14// further and so on and so forth. It results in a tree of objects that represent the things we've
15// parsed (an AST). This means that the logic for how expansions work lives in those objects, and
16// the logic for how we go from plain text to parsed objects lives here.
17//
18// To keep things simple, we do our "lexing" or "scanning" just as a few functions at the end of the
19// file rather than as a dedicated lexer that emits tokens. This matches the simplicity of the
20// format we are parsing relatively well
21//
22// Below is an EBNF grammar for the language. The parser was built by basically turning this into
23// functions and structs named the same reading the string bite by bite (peekRune and nextRune)
24
25/*
26EscapedBackslash = "\\" EscapedDollar = ( "\$" | "$$") Identifier = letter { letters |
27digit | "_" } Expansion = "$" ( Identifier | Brace ) Brace = "{" Identifier [
28Identifier BraceOperation ] "}" Text = { EscapedBackslash | EscapedDollar | all characters except
29"$" } Expression = { Text | Expansion } EmptyValue = ":-" { Expression } UnsetValue =
30"-" { Expression } Substring = ":" number [ ":" number ] Required = "?" { Expression }
31Operation = EmptyValue | UnsetValue | Substring | Required
32*/
33
34const (
35 eof = -1
36)
37
38// Parser takes a string and parses out a tree of structs that represent text and Expansions
39type Parser struct {
40 input string // the string we are scanning
41 pos int // the current position
42}
43
44// NewParser returns a new instance of a Parser
45func NewParser(str string) *Parser {
46 return &Parser{
47 input: str,
48 pos: 0,
49 }
50}
51
52// Parse expansions out of the internal text and return them as a tree of Expressions
53func (p *Parser) Parse() (Expression, error) {
54 return p.parseExpression()
55}
56
57func (p *Parser) parseExpression(stop ...rune) (Expression, error) {
58 var expr Expression
59 var stopStr = string(stop)
60
61 for {
62 c := p.peekRune()
63 if c == eof || strings.ContainsRune(stopStr, c) {
64 break
65 }
66
67 // check for our escaped characters first, as we assume nothing subsequently is escaped
68 if strings.HasPrefix(p.input[p.pos:], `\\`) {
69 p.pos += 2
70 expr = append(expr, ExpressionItem{Text: `\\`})
71 continue
72 } else if strings.HasPrefix(p.input[p.pos:], `\$`) || strings.HasPrefix(p.input[p.pos:], `$$`) {
73 p.pos += 2
74 expr = append(expr, ExpressionItem{Text: `$`})
75 continue
76 }
77
78 // Ignore bash shell expansions
79 if strings.HasPrefix(p.input[p.pos:], `$(`) {
80 p.pos += 2
81 expr = append(expr, ExpressionItem{Text: `$(`})
82 continue
83 }
84
85 // If we run into a dollar sign and it's not the last char, it's an expansion
86 if c == '$' && p.pos < (len(p.input)-1) {
87 expansion, err := p.parseExpansion()
88 if err != nil {
89 return nil, err
90 }
91 expr = append(expr, ExpressionItem{Expansion: expansion})
92 continue
93 }
94
95 // nibble a character, otherwise if it's a \ or a $ we can loop
96 c = p.nextRune()
97
98 // Scan as much as we can into text
99 text := p.scanUntil(func(r rune) bool {
100 return (r == '$' || r == '\\' || strings.ContainsRune(stopStr, r))
101 })
102
103 expr = append(expr, ExpressionItem{Text: string(c) + text})
104 }
105
106 return expr, nil
107}
108
109func (p *Parser) parseExpansion() (Expansion, error) {
110 if c := p.nextRune(); c != '$' {
111 return nil, fmt.Errorf("Expected expansion to start with $, got %c", c)
112 }
113
114 // if we have an open brace, this is a brace expansion
115 if c := p.peekRune(); c == '{' {
116 return p.parseBraceExpansion()
117 }
118
119 identifier, err := p.scanIdentifier()
120 if err != nil {
121 return nil, err
122 }
123
124 return VariableExpansion{Identifier: identifier}, nil
125}
126
127func (p *Parser) parseBraceExpansion() (Expansion, error) {
128 if c := p.nextRune(); c != '{' {
129 return nil, fmt.Errorf("Expected brace expansion to start with {, got %c", c)
130 }
131
132 identifier, err := p.scanIdentifier()
133 if err != nil {
134 return nil, err
135 }
136
137 if c := p.peekRune(); c == '}' {
138 _ = p.nextRune()
139 return VariableExpansion{Identifier: identifier}, nil
140 }
141
142 var operator string
143 var exp Expansion
144
145 // Parse an operator, some trickery is needed to handle : vs :-
146 if op1 := p.nextRune(); op1 == ':' {
147 if op2 := p.peekRune(); op2 == '-' {
148 _ = p.nextRune()
149 operator = ":-"
150 } else {
151 operator = ":"
152 }
153 } else if op1 == '?' || op1 == '-' {
154 operator = string(op1)
155 } else {
156 return nil, fmt.Errorf("Expected an operator, got %c", op1)
157 }
158
159 switch operator {
160 case `:-`:
161 exp, err = p.parseEmptyValueExpansion(identifier)
162 if err != nil {
163 return nil, err
164 }
165 case `-`:
166 exp, err = p.parseUnsetValueExpansion(identifier)
167 if err != nil {
168 return nil, err
169 }
170 case `:`:
171 exp, err = p.parseSubstringExpansion(identifier)
172 if err != nil {
173 return nil, err
174 }
175 case `?`:
176 exp, err = p.parseRequiredExpansion(identifier)
177 if err != nil {
178 return nil, err
179 }
180 }
181
182 if c := p.nextRune(); c != '}' {
183 return nil, fmt.Errorf("Expected brace expansion to end with }, got %c", c)
184 }
185
186 return exp, nil
187}
188
189func (p *Parser) parseEmptyValueExpansion(identifier string) (Expansion, error) {
190 // parse an expression (text and expansions) up until the end of the brace
191 expr, err := p.parseExpression('}')
192 if err != nil {
193 return nil, err
194 }
195
196 return EmptyValueExpansion{Identifier: identifier, Content: expr}, nil
197}
198
199func (p *Parser) parseUnsetValueExpansion(identifier string) (Expansion, error) {
200 expr, err := p.parseExpression('}')
201 if err != nil {
202 return nil, err
203 }
204
205 return UnsetValueExpansion{Identifier: identifier, Content: expr}, nil
206}
207
208func (p *Parser) parseSubstringExpansion(identifier string) (Expansion, error) {
209 offset := p.scanUntil(func(r rune) bool {
210 return r == ':' || r == '}'
211 })
212
213 offsetInt, err := strconv.Atoi(strings.TrimSpace(offset))
214 if err != nil {
215 return nil, fmt.Errorf("Unable to parse offset: %v", err)
216 }
217
218 if c := p.peekRune(); c == '}' {
219 return SubstringExpansion{Identifier: identifier, Offset: offsetInt}, nil
220 }
221
222 _ = p.nextRune()
223 length := p.scanUntil(func(r rune) bool {
224 return r == '}'
225 })
226
227 lengthInt, err := strconv.Atoi(strings.TrimSpace(length))
228 if err != nil {
229 return nil, fmt.Errorf("Unable to parse length: %v", err)
230 }
231
232 return SubstringExpansion{Identifier: identifier, Offset: offsetInt, Length: lengthInt, HasLength: true}, nil
233}
234
235func (p *Parser) parseRequiredExpansion(identifier string) (Expansion, error) {
236 expr, err := p.parseExpression('}')
237 if err != nil {
238 return nil, err
239 }
240
241 return RequiredExpansion{Identifier: identifier, Message: expr}, nil
242}
243
244func (p *Parser) scanUntil(f func(rune) bool) string {
245 start := p.pos
246 for int(p.pos) < len(p.input) {
247 c, size := utf8.DecodeRuneInString(p.input[p.pos:])
248 if c == utf8.RuneError || f(c) {
249 break
250 }
251 p.pos += size
252 }
253 return p.input[start:p.pos]
254}
255
256func (p *Parser) scanIdentifier() (string, error) {
257 if c := p.peekRune(); !unicode.IsLetter(c) {
258 return "", fmt.Errorf("Expected identifier to start with a letter, got %c", c)
259 }
260 var notIdentifierChar = func(r rune) bool {
261 return (!unicode.IsLetter(r) && !unicode.IsNumber(r) && r != '_')
262 }
263 return p.scanUntil(notIdentifierChar), nil
264}
265
266func (p *Parser) nextRune() rune {
267 if int(p.pos) >= len(p.input) {
268 return eof
269 }
270 c, size := utf8.DecodeRuneInString(p.input[p.pos:])
271 p.pos += size
272 return c
273}
274
275func (p *Parser) peekRune() rune {
276 if int(p.pos) >= len(p.input) {
277 return eof
278 }
279 c, _ := utf8.DecodeRuneInString(p.input[p.pos:])
280 return c
281}