parse.go

  1// Copyright (C) 2016 Kohei YOSHIDA. All rights reserved.
  2//
  3// This program is free software; you can redistribute it and/or
  4// modify it under the terms of The BSD 3-Clause License
  5// that can be found in the LICENSE file.
  6
  7package uritemplate
  8
  9import (
 10	"fmt"
 11	"unicode"
 12	"unicode/utf8"
 13)
 14
 15type parseOp int
 16
 17const (
 18	parseOpSimple parseOp = iota
 19	parseOpPlus
 20	parseOpCrosshatch
 21	parseOpDot
 22	parseOpSlash
 23	parseOpSemicolon
 24	parseOpQuestion
 25	parseOpAmpersand
 26)
 27
 28var (
 29	rangeVarchar = &unicode.RangeTable{
 30		R16: []unicode.Range16{
 31			{Lo: 0x0030, Hi: 0x0039, Stride: 1}, // '0' - '9'
 32			{Lo: 0x0041, Hi: 0x005A, Stride: 1}, // 'A' - 'Z'
 33			{Lo: 0x005F, Hi: 0x005F, Stride: 1}, // '_'
 34			{Lo: 0x0061, Hi: 0x007A, Stride: 1}, // 'a' - 'z'
 35		},
 36		LatinOffset: 4,
 37	}
 38	rangeLiterals = &unicode.RangeTable{
 39		R16: []unicode.Range16{
 40			{Lo: 0x0021, Hi: 0x0021, Stride: 1}, // '!'
 41			{Lo: 0x0023, Hi: 0x0024, Stride: 1}, // '#' - '$'
 42			{Lo: 0x0026, Hi: 0x003B, Stride: 1}, // '&' ''' '(' - ';'. '''/27 used to be excluded but an errata is in the review process https://www.rfc-editor.org/errata/eid6937
 43			{Lo: 0x003D, Hi: 0x003D, Stride: 1}, // '='
 44			{Lo: 0x003F, Hi: 0x005B, Stride: 1}, // '?' - '['
 45			{Lo: 0x005D, Hi: 0x005D, Stride: 1}, // ']'
 46			{Lo: 0x005F, Hi: 0x005F, Stride: 1}, // '_'
 47			{Lo: 0x0061, Hi: 0x007A, Stride: 1}, // 'a' - 'z'
 48			{Lo: 0x007E, Hi: 0x007E, Stride: 1}, // '~'
 49			{Lo: 0x00A0, Hi: 0xD7FF, Stride: 1}, // ucschar
 50			{Lo: 0xE000, Hi: 0xF8FF, Stride: 1}, // iprivate
 51			{Lo: 0xF900, Hi: 0xFDCF, Stride: 1}, // ucschar
 52			{Lo: 0xFDF0, Hi: 0xFFEF, Stride: 1}, // ucschar
 53		},
 54		R32: []unicode.Range32{
 55			{Lo: 0x00010000, Hi: 0x0001FFFD, Stride: 1}, // ucschar
 56			{Lo: 0x00020000, Hi: 0x0002FFFD, Stride: 1}, // ucschar
 57			{Lo: 0x00030000, Hi: 0x0003FFFD, Stride: 1}, // ucschar
 58			{Lo: 0x00040000, Hi: 0x0004FFFD, Stride: 1}, // ucschar
 59			{Lo: 0x00050000, Hi: 0x0005FFFD, Stride: 1}, // ucschar
 60			{Lo: 0x00060000, Hi: 0x0006FFFD, Stride: 1}, // ucschar
 61			{Lo: 0x00070000, Hi: 0x0007FFFD, Stride: 1}, // ucschar
 62			{Lo: 0x00080000, Hi: 0x0008FFFD, Stride: 1}, // ucschar
 63			{Lo: 0x00090000, Hi: 0x0009FFFD, Stride: 1}, // ucschar
 64			{Lo: 0x000A0000, Hi: 0x000AFFFD, Stride: 1}, // ucschar
 65			{Lo: 0x000B0000, Hi: 0x000BFFFD, Stride: 1}, // ucschar
 66			{Lo: 0x000C0000, Hi: 0x000CFFFD, Stride: 1}, // ucschar
 67			{Lo: 0x000D0000, Hi: 0x000DFFFD, Stride: 1}, // ucschar
 68			{Lo: 0x000E1000, Hi: 0x000EFFFD, Stride: 1}, // ucschar
 69			{Lo: 0x000F0000, Hi: 0x000FFFFD, Stride: 1}, // iprivate
 70			{Lo: 0x00100000, Hi: 0x0010FFFD, Stride: 1}, // iprivate
 71		},
 72		LatinOffset: 10,
 73	}
 74)
 75
 76type parser struct {
 77	r     string
 78	start int
 79	stop  int
 80	state parseState
 81}
 82
 83func (p *parser) errorf(i rune, format string, a ...interface{}) error {
 84	return fmt.Errorf("%s: %s%s", fmt.Sprintf(format, a...), p.r[0:p.stop], string(i))
 85}
 86
 87func (p *parser) rune() (rune, int) {
 88	r, size := utf8.DecodeRuneInString(p.r[p.stop:])
 89	if r != utf8.RuneError {
 90		p.stop += size
 91	}
 92	return r, size
 93}
 94
 95func (p *parser) unread(r rune) {
 96	p.stop -= utf8.RuneLen(r)
 97}
 98
 99type parseState int
100
101const (
102	parseStateDefault = parseState(iota)
103	parseStateOperator
104	parseStateVarList
105	parseStateVarName
106	parseStatePrefix
107)
108
109func (p *parser) setState(state parseState) {
110	p.state = state
111	p.start = p.stop
112}
113
114func (p *parser) parseURITemplate() (*Template, error) {
115	tmpl := Template{
116		raw:   p.r,
117		exprs: []template{},
118	}
119
120	var exp *expression
121	for {
122		r, size := p.rune()
123		if r == utf8.RuneError {
124			if size == 0 {
125				if p.state != parseStateDefault {
126					return nil, p.errorf('_', "incomplete expression")
127				}
128				if p.start < p.stop {
129					tmpl.exprs = append(tmpl.exprs, literals(p.r[p.start:p.stop]))
130				}
131				return &tmpl, nil
132			}
133			return nil, p.errorf('_', "invalid UTF-8 sequence")
134		}
135
136		switch p.state {
137		case parseStateDefault:
138			switch r {
139			case '{':
140				if stop := p.stop - size; stop > p.start {
141					tmpl.exprs = append(tmpl.exprs, literals(p.r[p.start:stop]))
142				}
143				exp = &expression{}
144				tmpl.exprs = append(tmpl.exprs, exp)
145				p.setState(parseStateOperator)
146			case '%':
147				p.unread(r)
148				if err := p.consumeTriplet(); err != nil {
149					return nil, err
150				}
151			default:
152				if !unicode.Is(rangeLiterals, r) {
153					p.unread(r)
154					return nil, p.errorf('_', "unacceptable character (hint: use %%XX encoding)")
155				}
156			}
157		case parseStateOperator:
158			switch r {
159			default:
160				p.unread(r)
161				exp.op = parseOpSimple
162			case '+':
163				exp.op = parseOpPlus
164			case '#':
165				exp.op = parseOpCrosshatch
166			case '.':
167				exp.op = parseOpDot
168			case '/':
169				exp.op = parseOpSlash
170			case ';':
171				exp.op = parseOpSemicolon
172			case '?':
173				exp.op = parseOpQuestion
174			case '&':
175				exp.op = parseOpAmpersand
176			case '=', ',', '!', '@', '|': // op-reserved
177				return nil, p.errorf('|', "unimplemented operator (op-reserved)")
178			}
179			p.setState(parseStateVarName)
180		case parseStateVarList:
181			switch r {
182			case ',':
183				p.setState(parseStateVarName)
184			case '}':
185				exp.init()
186				p.setState(parseStateDefault)
187			default:
188				p.unread(r)
189				return nil, p.errorf('_', "unrecognized value modifier")
190			}
191		case parseStateVarName:
192			switch r {
193			case ':', '*':
194				name := p.r[p.start : p.stop-size]
195				if !isValidVarname(name) {
196					return nil, p.errorf('|', "unacceptable variable name")
197				}
198				explode := r == '*'
199				exp.vars = append(exp.vars, varspec{
200					name:    name,
201					explode: explode,
202				})
203				if explode {
204					p.setState(parseStateVarList)
205				} else {
206					p.setState(parseStatePrefix)
207				}
208			case ',', '}':
209				p.unread(r)
210				name := p.r[p.start:p.stop]
211				if !isValidVarname(name) {
212					return nil, p.errorf('|', "unacceptable variable name")
213				}
214				exp.vars = append(exp.vars, varspec{
215					name: name,
216				})
217				p.setState(parseStateVarList)
218			case '%':
219				p.unread(r)
220				if err := p.consumeTriplet(); err != nil {
221					return nil, err
222				}
223			case '.':
224				if dot := p.stop - size; dot == p.start || p.r[dot-1] == '.' {
225					return nil, p.errorf('|', "unacceptable variable name")
226				}
227			default:
228				if !unicode.Is(rangeVarchar, r) {
229					p.unread(r)
230					return nil, p.errorf('_', "unacceptable variable name")
231				}
232			}
233		case parseStatePrefix:
234			spec := &(exp.vars[len(exp.vars)-1])
235			switch {
236			case '0' <= r && r <= '9':
237				spec.maxlen *= 10
238				spec.maxlen += int(r - '0')
239				if spec.maxlen == 0 || spec.maxlen > 9999 {
240					return nil, p.errorf('|', "max-length must be (0, 9999]")
241				}
242			default:
243				p.unread(r)
244				if spec.maxlen == 0 {
245					return nil, p.errorf('_', "max-length must be (0, 9999]")
246				}
247				p.setState(parseStateVarList)
248			}
249		default:
250			p.unread(r)
251			panic(p.errorf('_', "unhandled parseState(%d)", p.state))
252		}
253	}
254}
255
256func isValidVarname(name string) bool {
257	if l := len(name); l == 0 || name[0] == '.' || name[l-1] == '.' {
258		return false
259	}
260	for i := 1; i < len(name)-1; i++ {
261		switch c := name[i]; c {
262		case '.':
263			if name[i-1] == '.' {
264				return false
265			}
266		}
267	}
268	return true
269}
270
271func (p *parser) consumeTriplet() error {
272	if len(p.r)-p.stop < 3 || p.r[p.stop] != '%' || !ishex(p.r[p.stop+1]) || !ishex(p.r[p.stop+2]) {
273		return p.errorf('_', "incomplete pct-encodeed")
274	}
275	p.stop += 3
276	return nil
277}