regexp.go

  1// Copyright 2012 The Gorilla Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5package mux
  6
  7import (
  8	"bytes"
  9	"fmt"
 10	"net/http"
 11	"net/url"
 12	"regexp"
 13	"strconv"
 14	"strings"
 15)
 16
 17type routeRegexpOptions struct {
 18	strictSlash    bool
 19	useEncodedPath bool
 20}
 21
 22type regexpType int
 23
 24const (
 25	regexpTypePath   regexpType = 0
 26	regexpTypeHost   regexpType = 1
 27	regexpTypePrefix regexpType = 2
 28	regexpTypeQuery  regexpType = 3
 29)
 30
 31// newRouteRegexp parses a route template and returns a routeRegexp,
 32// used to match a host, a path or a query string.
 33//
 34// It will extract named variables, assemble a regexp to be matched, create
 35// a "reverse" template to build URLs and compile regexps to validate variable
 36// values used in URL building.
 37//
 38// Previously we accepted only Python-like identifiers for variable
 39// names ([a-zA-Z_][a-zA-Z0-9_]*), but currently the only restriction is that
 40// name and pattern can't be empty, and names can't contain a colon.
 41func newRouteRegexp(tpl string, typ regexpType, options routeRegexpOptions) (*routeRegexp, error) {
 42	// Check if it is well-formed.
 43	idxs, errBraces := braceIndices(tpl)
 44	if errBraces != nil {
 45		return nil, errBraces
 46	}
 47	// Backup the original.
 48	template := tpl
 49	// Now let's parse it.
 50	defaultPattern := "[^/]+"
 51	if typ == regexpTypeQuery {
 52		defaultPattern = ".*"
 53	} else if typ == regexpTypeHost {
 54		defaultPattern = "[^.]+"
 55	}
 56	// Only match strict slash if not matching
 57	if typ != regexpTypePath {
 58		options.strictSlash = false
 59	}
 60	// Set a flag for strictSlash.
 61	endSlash := false
 62	if options.strictSlash && strings.HasSuffix(tpl, "/") {
 63		tpl = tpl[:len(tpl)-1]
 64		endSlash = true
 65	}
 66	varsN := make([]string, len(idxs)/2)
 67	varsR := make([]*regexp.Regexp, len(idxs)/2)
 68	pattern := bytes.NewBufferString("")
 69	pattern.WriteByte('^')
 70	reverse := bytes.NewBufferString("")
 71	var end int
 72	var err error
 73	for i := 0; i < len(idxs); i += 2 {
 74		// Set all values we are interested in.
 75		raw := tpl[end:idxs[i]]
 76		end = idxs[i+1]
 77		parts := strings.SplitN(tpl[idxs[i]+1:end-1], ":", 2)
 78		name := parts[0]
 79		patt := defaultPattern
 80		if len(parts) == 2 {
 81			patt = parts[1]
 82		}
 83		// Name or pattern can't be empty.
 84		if name == "" || patt == "" {
 85			return nil, fmt.Errorf("mux: missing name or pattern in %q",
 86				tpl[idxs[i]:end])
 87		}
 88		// Build the regexp pattern.
 89		fmt.Fprintf(pattern, "%s(?P<%s>%s)", regexp.QuoteMeta(raw), varGroupName(i/2), patt)
 90
 91		// Build the reverse template.
 92		fmt.Fprintf(reverse, "%s%%s", raw)
 93
 94		// Append variable name and compiled pattern.
 95		varsN[i/2] = name
 96		varsR[i/2], err = regexp.Compile(fmt.Sprintf("^%s$", patt))
 97		if err != nil {
 98			return nil, err
 99		}
100	}
101	// Add the remaining.
102	raw := tpl[end:]
103	pattern.WriteString(regexp.QuoteMeta(raw))
104	if options.strictSlash {
105		pattern.WriteString("[/]?")
106	}
107	if typ == regexpTypeQuery {
108		// Add the default pattern if the query value is empty
109		if queryVal := strings.SplitN(template, "=", 2)[1]; queryVal == "" {
110			pattern.WriteString(defaultPattern)
111		}
112	}
113	if typ != regexpTypePrefix {
114		pattern.WriteByte('$')
115	}
116
117	var wildcardHostPort bool
118	if typ == regexpTypeHost {
119		if !strings.Contains(pattern.String(), ":") {
120			wildcardHostPort = true
121		}
122	}
123	reverse.WriteString(raw)
124	if endSlash {
125		reverse.WriteByte('/')
126	}
127	// Compile full regexp.
128	reg, errCompile := regexp.Compile(pattern.String())
129	if errCompile != nil {
130		return nil, errCompile
131	}
132
133	// Check for capturing groups which used to work in older versions
134	if reg.NumSubexp() != len(idxs)/2 {
135		panic(fmt.Sprintf("route %s contains capture groups in its regexp. ", template) +
136			"Only non-capturing groups are accepted: e.g. (?:pattern) instead of (pattern)")
137	}
138
139	// Done!
140	return &routeRegexp{
141		template:         template,
142		regexpType:       typ,
143		options:          options,
144		regexp:           reg,
145		reverse:          reverse.String(),
146		varsN:            varsN,
147		varsR:            varsR,
148		wildcardHostPort: wildcardHostPort,
149	}, nil
150}
151
152// routeRegexp stores a regexp to match a host or path and information to
153// collect and validate route variables.
154type routeRegexp struct {
155	// The unmodified template.
156	template string
157	// The type of match
158	regexpType regexpType
159	// Options for matching
160	options routeRegexpOptions
161	// Expanded regexp.
162	regexp *regexp.Regexp
163	// Reverse template.
164	reverse string
165	// Variable names.
166	varsN []string
167	// Variable regexps (validators).
168	varsR []*regexp.Regexp
169	// Wildcard host-port (no strict port match in hostname)
170	wildcardHostPort bool
171}
172
173// Match matches the regexp against the URL host or path.
174func (r *routeRegexp) Match(req *http.Request, match *RouteMatch) bool {
175	if r.regexpType == regexpTypeHost {
176		host := getHost(req)
177		if r.wildcardHostPort {
178			// Don't be strict on the port match
179			if i := strings.Index(host, ":"); i != -1 {
180				host = host[:i]
181			}
182		}
183		return r.regexp.MatchString(host)
184	} else {
185		if r.regexpType == regexpTypeQuery {
186			return r.matchQueryString(req)
187		}
188		path := req.URL.Path
189		if r.options.useEncodedPath {
190			path = req.URL.EscapedPath()
191		}
192		return r.regexp.MatchString(path)
193	}
194}
195
196// url builds a URL part using the given values.
197func (r *routeRegexp) url(values map[string]string) (string, error) {
198	urlValues := make([]interface{}, len(r.varsN))
199	for k, v := range r.varsN {
200		value, ok := values[v]
201		if !ok {
202			return "", fmt.Errorf("mux: missing route variable %q", v)
203		}
204		if r.regexpType == regexpTypeQuery {
205			value = url.QueryEscape(value)
206		}
207		urlValues[k] = value
208	}
209	rv := fmt.Sprintf(r.reverse, urlValues...)
210	if !r.regexp.MatchString(rv) {
211		// The URL is checked against the full regexp, instead of checking
212		// individual variables. This is faster but to provide a good error
213		// message, we check individual regexps if the URL doesn't match.
214		for k, v := range r.varsN {
215			if !r.varsR[k].MatchString(values[v]) {
216				return "", fmt.Errorf(
217					"mux: variable %q doesn't match, expected %q", values[v],
218					r.varsR[k].String())
219			}
220		}
221	}
222	return rv, nil
223}
224
225// getURLQuery returns a single query parameter from a request URL.
226// For a URL with foo=bar&baz=ding, we return only the relevant key
227// value pair for the routeRegexp.
228func (r *routeRegexp) getURLQuery(req *http.Request) string {
229	if r.regexpType != regexpTypeQuery {
230		return ""
231	}
232	templateKey := strings.SplitN(r.template, "=", 2)[0]
233	for key, vals := range req.URL.Query() {
234		if key == templateKey && len(vals) > 0 {
235			return key + "=" + vals[0]
236		}
237	}
238	return ""
239}
240
241func (r *routeRegexp) matchQueryString(req *http.Request) bool {
242	return r.regexp.MatchString(r.getURLQuery(req))
243}
244
245// braceIndices returns the first level curly brace indices from a string.
246// It returns an error in case of unbalanced braces.
247func braceIndices(s string) ([]int, error) {
248	var level, idx int
249	var idxs []int
250	for i := 0; i < len(s); i++ {
251		switch s[i] {
252		case '{':
253			if level++; level == 1 {
254				idx = i
255			}
256		case '}':
257			if level--; level == 0 {
258				idxs = append(idxs, idx, i+1)
259			} else if level < 0 {
260				return nil, fmt.Errorf("mux: unbalanced braces in %q", s)
261			}
262		}
263	}
264	if level != 0 {
265		return nil, fmt.Errorf("mux: unbalanced braces in %q", s)
266	}
267	return idxs, nil
268}
269
270// varGroupName builds a capturing group name for the indexed variable.
271func varGroupName(idx int) string {
272	return "v" + strconv.Itoa(idx)
273}
274
275// ----------------------------------------------------------------------------
276// routeRegexpGroup
277// ----------------------------------------------------------------------------
278
279// routeRegexpGroup groups the route matchers that carry variables.
280type routeRegexpGroup struct {
281	host    *routeRegexp
282	path    *routeRegexp
283	queries []*routeRegexp
284}
285
286// setMatch extracts the variables from the URL once a route matches.
287func (v routeRegexpGroup) setMatch(req *http.Request, m *RouteMatch, r *Route) {
288	// Store host variables.
289	if v.host != nil {
290		host := getHost(req)
291		matches := v.host.regexp.FindStringSubmatchIndex(host)
292		if len(matches) > 0 {
293			extractVars(host, matches, v.host.varsN, m.Vars)
294		}
295	}
296	path := req.URL.Path
297	if r.useEncodedPath {
298		path = req.URL.EscapedPath()
299	}
300	// Store path variables.
301	if v.path != nil {
302		matches := v.path.regexp.FindStringSubmatchIndex(path)
303		if len(matches) > 0 {
304			extractVars(path, matches, v.path.varsN, m.Vars)
305			// Check if we should redirect.
306			if v.path.options.strictSlash {
307				p1 := strings.HasSuffix(path, "/")
308				p2 := strings.HasSuffix(v.path.template, "/")
309				if p1 != p2 {
310					u, _ := url.Parse(req.URL.String())
311					if p1 {
312						u.Path = u.Path[:len(u.Path)-1]
313					} else {
314						u.Path += "/"
315					}
316					m.Handler = http.RedirectHandler(u.String(), http.StatusMovedPermanently)
317				}
318			}
319		}
320	}
321	// Store query string variables.
322	for _, q := range v.queries {
323		queryURL := q.getURLQuery(req)
324		matches := q.regexp.FindStringSubmatchIndex(queryURL)
325		if len(matches) > 0 {
326			extractVars(queryURL, matches, q.varsN, m.Vars)
327		}
328	}
329}
330
331// getHost tries its best to return the request host.
332// According to section 14.23 of RFC 2616 the Host header
333// can include the port number if the default value of 80 is not used.
334func getHost(r *http.Request) string {
335	if r.URL.IsAbs() {
336		return r.URL.Host
337	}
338	return r.Host
339}
340
341func extractVars(input string, matches []int, names []string, output map[string]string) {
342	for i, name := range names {
343		output[name] = input[matches[2*i+2]:matches[2*i+3]]
344	}
345}