1// Copyright (C) 2016 Kohei YOSHIDA. All rights reserved.
2//
3// This program is free software; you can redistribute it and/or
4// modify it under the terms of The BSD 3-Clause License
5// that can be found in the LICENSE file.
6
7package uritemplate
8
9import (
10 "fmt"
11 "unicode"
12 "unicode/utf8"
13)
14
15type parseOp int
16
17const (
18 parseOpSimple parseOp = iota
19 parseOpPlus
20 parseOpCrosshatch
21 parseOpDot
22 parseOpSlash
23 parseOpSemicolon
24 parseOpQuestion
25 parseOpAmpersand
26)
27
28var (
29 rangeVarchar = &unicode.RangeTable{
30 R16: []unicode.Range16{
31 {Lo: 0x0030, Hi: 0x0039, Stride: 1}, // '0' - '9'
32 {Lo: 0x0041, Hi: 0x005A, Stride: 1}, // 'A' - 'Z'
33 {Lo: 0x005F, Hi: 0x005F, Stride: 1}, // '_'
34 {Lo: 0x0061, Hi: 0x007A, Stride: 1}, // 'a' - 'z'
35 },
36 LatinOffset: 4,
37 }
38 rangeLiterals = &unicode.RangeTable{
39 R16: []unicode.Range16{
40 {Lo: 0x0021, Hi: 0x0021, Stride: 1}, // '!'
41 {Lo: 0x0023, Hi: 0x0024, Stride: 1}, // '#' - '$'
42 {Lo: 0x0026, Hi: 0x003B, Stride: 1}, // '&' ''' '(' - ';'. '''/27 used to be excluded but an errata is in the review process https://www.rfc-editor.org/errata/eid6937
43 {Lo: 0x003D, Hi: 0x003D, Stride: 1}, // '='
44 {Lo: 0x003F, Hi: 0x005B, Stride: 1}, // '?' - '['
45 {Lo: 0x005D, Hi: 0x005D, Stride: 1}, // ']'
46 {Lo: 0x005F, Hi: 0x005F, Stride: 1}, // '_'
47 {Lo: 0x0061, Hi: 0x007A, Stride: 1}, // 'a' - 'z'
48 {Lo: 0x007E, Hi: 0x007E, Stride: 1}, // '~'
49 {Lo: 0x00A0, Hi: 0xD7FF, Stride: 1}, // ucschar
50 {Lo: 0xE000, Hi: 0xF8FF, Stride: 1}, // iprivate
51 {Lo: 0xF900, Hi: 0xFDCF, Stride: 1}, // ucschar
52 {Lo: 0xFDF0, Hi: 0xFFEF, Stride: 1}, // ucschar
53 },
54 R32: []unicode.Range32{
55 {Lo: 0x00010000, Hi: 0x0001FFFD, Stride: 1}, // ucschar
56 {Lo: 0x00020000, Hi: 0x0002FFFD, Stride: 1}, // ucschar
57 {Lo: 0x00030000, Hi: 0x0003FFFD, Stride: 1}, // ucschar
58 {Lo: 0x00040000, Hi: 0x0004FFFD, Stride: 1}, // ucschar
59 {Lo: 0x00050000, Hi: 0x0005FFFD, Stride: 1}, // ucschar
60 {Lo: 0x00060000, Hi: 0x0006FFFD, Stride: 1}, // ucschar
61 {Lo: 0x00070000, Hi: 0x0007FFFD, Stride: 1}, // ucschar
62 {Lo: 0x00080000, Hi: 0x0008FFFD, Stride: 1}, // ucschar
63 {Lo: 0x00090000, Hi: 0x0009FFFD, Stride: 1}, // ucschar
64 {Lo: 0x000A0000, Hi: 0x000AFFFD, Stride: 1}, // ucschar
65 {Lo: 0x000B0000, Hi: 0x000BFFFD, Stride: 1}, // ucschar
66 {Lo: 0x000C0000, Hi: 0x000CFFFD, Stride: 1}, // ucschar
67 {Lo: 0x000D0000, Hi: 0x000DFFFD, Stride: 1}, // ucschar
68 {Lo: 0x000E1000, Hi: 0x000EFFFD, Stride: 1}, // ucschar
69 {Lo: 0x000F0000, Hi: 0x000FFFFD, Stride: 1}, // iprivate
70 {Lo: 0x00100000, Hi: 0x0010FFFD, Stride: 1}, // iprivate
71 },
72 LatinOffset: 10,
73 }
74)
75
76type parser struct {
77 r string
78 start int
79 stop int
80 state parseState
81}
82
83func (p *parser) errorf(i rune, format string, a ...interface{}) error {
84 return fmt.Errorf("%s: %s%s", fmt.Sprintf(format, a...), p.r[0:p.stop], string(i))
85}
86
87func (p *parser) rune() (rune, int) {
88 r, size := utf8.DecodeRuneInString(p.r[p.stop:])
89 if r != utf8.RuneError {
90 p.stop += size
91 }
92 return r, size
93}
94
95func (p *parser) unread(r rune) {
96 p.stop -= utf8.RuneLen(r)
97}
98
99type parseState int
100
101const (
102 parseStateDefault = parseState(iota)
103 parseStateOperator
104 parseStateVarList
105 parseStateVarName
106 parseStatePrefix
107)
108
109func (p *parser) setState(state parseState) {
110 p.state = state
111 p.start = p.stop
112}
113
114func (p *parser) parseURITemplate() (*Template, error) {
115 tmpl := Template{
116 raw: p.r,
117 exprs: []template{},
118 }
119
120 var exp *expression
121 for {
122 r, size := p.rune()
123 if r == utf8.RuneError {
124 if size == 0 {
125 if p.state != parseStateDefault {
126 return nil, p.errorf('_', "incomplete expression")
127 }
128 if p.start < p.stop {
129 tmpl.exprs = append(tmpl.exprs, literals(p.r[p.start:p.stop]))
130 }
131 return &tmpl, nil
132 }
133 return nil, p.errorf('_', "invalid UTF-8 sequence")
134 }
135
136 switch p.state {
137 case parseStateDefault:
138 switch r {
139 case '{':
140 if stop := p.stop - size; stop > p.start {
141 tmpl.exprs = append(tmpl.exprs, literals(p.r[p.start:stop]))
142 }
143 exp = &expression{}
144 tmpl.exprs = append(tmpl.exprs, exp)
145 p.setState(parseStateOperator)
146 case '%':
147 p.unread(r)
148 if err := p.consumeTriplet(); err != nil {
149 return nil, err
150 }
151 default:
152 if !unicode.Is(rangeLiterals, r) {
153 p.unread(r)
154 return nil, p.errorf('_', "unacceptable character (hint: use %%XX encoding)")
155 }
156 }
157 case parseStateOperator:
158 switch r {
159 default:
160 p.unread(r)
161 exp.op = parseOpSimple
162 case '+':
163 exp.op = parseOpPlus
164 case '#':
165 exp.op = parseOpCrosshatch
166 case '.':
167 exp.op = parseOpDot
168 case '/':
169 exp.op = parseOpSlash
170 case ';':
171 exp.op = parseOpSemicolon
172 case '?':
173 exp.op = parseOpQuestion
174 case '&':
175 exp.op = parseOpAmpersand
176 case '=', ',', '!', '@', '|': // op-reserved
177 return nil, p.errorf('|', "unimplemented operator (op-reserved)")
178 }
179 p.setState(parseStateVarName)
180 case parseStateVarList:
181 switch r {
182 case ',':
183 p.setState(parseStateVarName)
184 case '}':
185 exp.init()
186 p.setState(parseStateDefault)
187 default:
188 p.unread(r)
189 return nil, p.errorf('_', "unrecognized value modifier")
190 }
191 case parseStateVarName:
192 switch r {
193 case ':', '*':
194 name := p.r[p.start : p.stop-size]
195 if !isValidVarname(name) {
196 return nil, p.errorf('|', "unacceptable variable name")
197 }
198 explode := r == '*'
199 exp.vars = append(exp.vars, varspec{
200 name: name,
201 explode: explode,
202 })
203 if explode {
204 p.setState(parseStateVarList)
205 } else {
206 p.setState(parseStatePrefix)
207 }
208 case ',', '}':
209 p.unread(r)
210 name := p.r[p.start:p.stop]
211 if !isValidVarname(name) {
212 return nil, p.errorf('|', "unacceptable variable name")
213 }
214 exp.vars = append(exp.vars, varspec{
215 name: name,
216 })
217 p.setState(parseStateVarList)
218 case '%':
219 p.unread(r)
220 if err := p.consumeTriplet(); err != nil {
221 return nil, err
222 }
223 case '.':
224 if dot := p.stop - size; dot == p.start || p.r[dot-1] == '.' {
225 return nil, p.errorf('|', "unacceptable variable name")
226 }
227 default:
228 if !unicode.Is(rangeVarchar, r) {
229 p.unread(r)
230 return nil, p.errorf('_', "unacceptable variable name")
231 }
232 }
233 case parseStatePrefix:
234 spec := &(exp.vars[len(exp.vars)-1])
235 switch {
236 case '0' <= r && r <= '9':
237 spec.maxlen *= 10
238 spec.maxlen += int(r - '0')
239 if spec.maxlen == 0 || spec.maxlen > 9999 {
240 return nil, p.errorf('|', "max-length must be (0, 9999]")
241 }
242 default:
243 p.unread(r)
244 if spec.maxlen == 0 {
245 return nil, p.errorf('_', "max-length must be (0, 9999]")
246 }
247 p.setState(parseStateVarList)
248 }
249 default:
250 p.unread(r)
251 panic(p.errorf('_', "unhandled parseState(%d)", p.state))
252 }
253 }
254}
255
256func isValidVarname(name string) bool {
257 if l := len(name); l == 0 || name[0] == '.' || name[l-1] == '.' {
258 return false
259 }
260 for i := 1; i < len(name)-1; i++ {
261 switch c := name[i]; c {
262 case '.':
263 if name[i-1] == '.' {
264 return false
265 }
266 }
267 }
268 return true
269}
270
271func (p *parser) consumeTriplet() error {
272 if len(p.r)-p.stop < 3 || p.r[p.stop] != '%' || !ishex(p.r[p.stop+1]) || !ishex(p.r[p.stop+2]) {
273 return p.errorf('_', "incomplete pct-encodeed")
274 }
275 p.stop += 3
276 return nil
277}