regexp.go

  1// Copyright 2012 The Gorilla Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5package mux
  6
  7import (
  8	"bytes"
  9	"fmt"
 10	"net/http"
 11	"net/url"
 12	"regexp"
 13	"strconv"
 14	"strings"
 15)
 16
 17type routeRegexpOptions struct {
 18	strictSlash    bool
 19	useEncodedPath bool
 20}
 21
 22type regexpType int
 23
 24const (
 25	regexpTypePath   regexpType = 0
 26	regexpTypeHost   regexpType = 1
 27	regexpTypePrefix regexpType = 2
 28	regexpTypeQuery  regexpType = 3
 29)
 30
 31// newRouteRegexp parses a route template and returns a routeRegexp,
 32// used to match a host, a path or a query string.
 33//
 34// It will extract named variables, assemble a regexp to be matched, create
 35// a "reverse" template to build URLs and compile regexps to validate variable
 36// values used in URL building.
 37//
 38// Previously we accepted only Python-like identifiers for variable
 39// names ([a-zA-Z_][a-zA-Z0-9_]*), but currently the only restriction is that
 40// name and pattern can't be empty, and names can't contain a colon.
 41func newRouteRegexp(tpl string, typ regexpType, options routeRegexpOptions) (*routeRegexp, error) {
 42	// Check if it is well-formed.
 43	idxs, errBraces := braceIndices(tpl)
 44	if errBraces != nil {
 45		return nil, errBraces
 46	}
 47	// Backup the original.
 48	template := tpl
 49	// Now let's parse it.
 50	defaultPattern := "[^/]+"
 51	if typ == regexpTypeQuery {
 52		defaultPattern = ".*"
 53	} else if typ == regexpTypeHost {
 54		defaultPattern = "[^.]+"
 55	}
 56	// Only match strict slash if not matching
 57	if typ != regexpTypePath {
 58		options.strictSlash = false
 59	}
 60	// Set a flag for strictSlash.
 61	endSlash := false
 62	if options.strictSlash && strings.HasSuffix(tpl, "/") {
 63		tpl = tpl[:len(tpl)-1]
 64		endSlash = true
 65	}
 66	varsN := make([]string, len(idxs)/2)
 67	varsR := make([]*regexp.Regexp, len(idxs)/2)
 68	pattern := bytes.NewBufferString("")
 69	pattern.WriteByte('^')
 70	reverse := bytes.NewBufferString("")
 71	var end int
 72	var err error
 73	for i := 0; i < len(idxs); i += 2 {
 74		// Set all values we are interested in.
 75		raw := tpl[end:idxs[i]]
 76		end = idxs[i+1]
 77		parts := strings.SplitN(tpl[idxs[i]+1:end-1], ":", 2)
 78		name := parts[0]
 79		patt := defaultPattern
 80		if len(parts) == 2 {
 81			patt = parts[1]
 82		}
 83		// Name or pattern can't be empty.
 84		if name == "" || patt == "" {
 85			return nil, fmt.Errorf("mux: missing name or pattern in %q",
 86				tpl[idxs[i]:end])
 87		}
 88		// Build the regexp pattern.
 89		fmt.Fprintf(pattern, "%s(?P<%s>%s)", regexp.QuoteMeta(raw), varGroupName(i/2), patt)
 90
 91		// Build the reverse template.
 92		fmt.Fprintf(reverse, "%s%%s", raw)
 93
 94		// Append variable name and compiled pattern.
 95		varsN[i/2] = name
 96		varsR[i/2], err = regexp.Compile(fmt.Sprintf("^%s$", patt))
 97		if err != nil {
 98			return nil, err
 99		}
100	}
101	// Add the remaining.
102	raw := tpl[end:]
103	pattern.WriteString(regexp.QuoteMeta(raw))
104	if options.strictSlash {
105		pattern.WriteString("[/]?")
106	}
107	if typ == regexpTypeQuery {
108		// Add the default pattern if the query value is empty
109		if queryVal := strings.SplitN(template, "=", 2)[1]; queryVal == "" {
110			pattern.WriteString(defaultPattern)
111		}
112	}
113	if typ != regexpTypePrefix {
114		pattern.WriteByte('$')
115	}
116	reverse.WriteString(raw)
117	if endSlash {
118		reverse.WriteByte('/')
119	}
120	// Compile full regexp.
121	reg, errCompile := regexp.Compile(pattern.String())
122	if errCompile != nil {
123		return nil, errCompile
124	}
125
126	// Check for capturing groups which used to work in older versions
127	if reg.NumSubexp() != len(idxs)/2 {
128		panic(fmt.Sprintf("route %s contains capture groups in its regexp. ", template) +
129			"Only non-capturing groups are accepted: e.g. (?:pattern) instead of (pattern)")
130	}
131
132	// Done!
133	return &routeRegexp{
134		template:   template,
135		regexpType: typ,
136		options:    options,
137		regexp:     reg,
138		reverse:    reverse.String(),
139		varsN:      varsN,
140		varsR:      varsR,
141	}, nil
142}
143
144// routeRegexp stores a regexp to match a host or path and information to
145// collect and validate route variables.
146type routeRegexp struct {
147	// The unmodified template.
148	template string
149	// The type of match
150	regexpType regexpType
151	// Options for matching
152	options routeRegexpOptions
153	// Expanded regexp.
154	regexp *regexp.Regexp
155	// Reverse template.
156	reverse string
157	// Variable names.
158	varsN []string
159	// Variable regexps (validators).
160	varsR []*regexp.Regexp
161}
162
163// Match matches the regexp against the URL host or path.
164func (r *routeRegexp) Match(req *http.Request, match *RouteMatch) bool {
165	if r.regexpType != regexpTypeHost {
166		if r.regexpType == regexpTypeQuery {
167			return r.matchQueryString(req)
168		}
169		path := req.URL.Path
170		if r.options.useEncodedPath {
171			path = req.URL.EscapedPath()
172		}
173		return r.regexp.MatchString(path)
174	}
175
176	return r.regexp.MatchString(getHost(req))
177}
178
179// url builds a URL part using the given values.
180func (r *routeRegexp) url(values map[string]string) (string, error) {
181	urlValues := make([]interface{}, len(r.varsN))
182	for k, v := range r.varsN {
183		value, ok := values[v]
184		if !ok {
185			return "", fmt.Errorf("mux: missing route variable %q", v)
186		}
187		if r.regexpType == regexpTypeQuery {
188			value = url.QueryEscape(value)
189		}
190		urlValues[k] = value
191	}
192	rv := fmt.Sprintf(r.reverse, urlValues...)
193	if !r.regexp.MatchString(rv) {
194		// The URL is checked against the full regexp, instead of checking
195		// individual variables. This is faster but to provide a good error
196		// message, we check individual regexps if the URL doesn't match.
197		for k, v := range r.varsN {
198			if !r.varsR[k].MatchString(values[v]) {
199				return "", fmt.Errorf(
200					"mux: variable %q doesn't match, expected %q", values[v],
201					r.varsR[k].String())
202			}
203		}
204	}
205	return rv, nil
206}
207
208// getURLQuery returns a single query parameter from a request URL.
209// For a URL with foo=bar&baz=ding, we return only the relevant key
210// value pair for the routeRegexp.
211func (r *routeRegexp) getURLQuery(req *http.Request) string {
212	if r.regexpType != regexpTypeQuery {
213		return ""
214	}
215	templateKey := strings.SplitN(r.template, "=", 2)[0]
216	for key, vals := range req.URL.Query() {
217		if key == templateKey && len(vals) > 0 {
218			return key + "=" + vals[0]
219		}
220	}
221	return ""
222}
223
224func (r *routeRegexp) matchQueryString(req *http.Request) bool {
225	return r.regexp.MatchString(r.getURLQuery(req))
226}
227
228// braceIndices returns the first level curly brace indices from a string.
229// It returns an error in case of unbalanced braces.
230func braceIndices(s string) ([]int, error) {
231	var level, idx int
232	var idxs []int
233	for i := 0; i < len(s); i++ {
234		switch s[i] {
235		case '{':
236			if level++; level == 1 {
237				idx = i
238			}
239		case '}':
240			if level--; level == 0 {
241				idxs = append(idxs, idx, i+1)
242			} else if level < 0 {
243				return nil, fmt.Errorf("mux: unbalanced braces in %q", s)
244			}
245		}
246	}
247	if level != 0 {
248		return nil, fmt.Errorf("mux: unbalanced braces in %q", s)
249	}
250	return idxs, nil
251}
252
253// varGroupName builds a capturing group name for the indexed variable.
254func varGroupName(idx int) string {
255	return "v" + strconv.Itoa(idx)
256}
257
258// ----------------------------------------------------------------------------
259// routeRegexpGroup
260// ----------------------------------------------------------------------------
261
262// routeRegexpGroup groups the route matchers that carry variables.
263type routeRegexpGroup struct {
264	host    *routeRegexp
265	path    *routeRegexp
266	queries []*routeRegexp
267}
268
269// setMatch extracts the variables from the URL once a route matches.
270func (v *routeRegexpGroup) setMatch(req *http.Request, m *RouteMatch, r *Route) {
271	// Store host variables.
272	if v.host != nil {
273		host := getHost(req)
274		matches := v.host.regexp.FindStringSubmatchIndex(host)
275		if len(matches) > 0 {
276			extractVars(host, matches, v.host.varsN, m.Vars)
277		}
278	}
279	path := req.URL.Path
280	if r.useEncodedPath {
281		path = req.URL.EscapedPath()
282	}
283	// Store path variables.
284	if v.path != nil {
285		matches := v.path.regexp.FindStringSubmatchIndex(path)
286		if len(matches) > 0 {
287			extractVars(path, matches, v.path.varsN, m.Vars)
288			// Check if we should redirect.
289			if v.path.options.strictSlash {
290				p1 := strings.HasSuffix(path, "/")
291				p2 := strings.HasSuffix(v.path.template, "/")
292				if p1 != p2 {
293					u, _ := url.Parse(req.URL.String())
294					if p1 {
295						u.Path = u.Path[:len(u.Path)-1]
296					} else {
297						u.Path += "/"
298					}
299					m.Handler = http.RedirectHandler(u.String(), 301)
300				}
301			}
302		}
303	}
304	// Store query string variables.
305	for _, q := range v.queries {
306		queryURL := q.getURLQuery(req)
307		matches := q.regexp.FindStringSubmatchIndex(queryURL)
308		if len(matches) > 0 {
309			extractVars(queryURL, matches, q.varsN, m.Vars)
310		}
311	}
312}
313
314// getHost tries its best to return the request host.
315func getHost(r *http.Request) string {
316	if r.URL.IsAbs() {
317		return r.URL.Host
318	}
319	host := r.Host
320	// Slice off any port information.
321	if i := strings.Index(host, ":"); i != -1 {
322		host = host[:i]
323	}
324	return host
325
326}
327
328func extractVars(input string, matches []int, names []string, output map[string]string) {
329	for i, name := range names {
330		output[name] = input[matches[2*i+2]:matches[2*i+3]]
331	}
332}