module.go

  1// Copyright 2018 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5// Package module defines the module.Version type
  6// along with support code.
  7package module
  8
  9// IMPORTANT NOTE
 10//
 11// This file essentially defines the set of valid import paths for the go command.
 12// There are many subtle considerations, including Unicode ambiguity,
 13// security, network, and file system representations.
 14//
 15// This file also defines the set of valid module path and version combinations,
 16// another topic with many subtle considerations.
 17//
 18// Changes to the semantics in this file require approval from rsc.
 19
 20import (
 21	"fmt"
 22	"sort"
 23	"strings"
 24	"unicode"
 25	"unicode/utf8"
 26
 27	"golang.org/x/tools/internal/semver"
 28)
 29
 30// A Version is defined by a module path and version pair.
 31type Version struct {
 32	Path string
 33
 34	// Version is usually a semantic version in canonical form.
 35	// There are two exceptions to this general rule.
 36	// First, the top-level target of a build has no specific version
 37	// and uses Version = "".
 38	// Second, during MVS calculations the version "none" is used
 39	// to represent the decision to take no version of a given module.
 40	Version string `json:",omitempty"`
 41}
 42
 43// Check checks that a given module path, version pair is valid.
 44// In addition to the path being a valid module path
 45// and the version being a valid semantic version,
 46// the two must correspond.
 47// For example, the path "yaml/v2" only corresponds to
 48// semantic versions beginning with "v2.".
 49func Check(path, version string) error {
 50	if err := CheckPath(path); err != nil {
 51		return err
 52	}
 53	if !semver.IsValid(version) {
 54		return fmt.Errorf("malformed semantic version %v", version)
 55	}
 56	_, pathMajor, _ := SplitPathVersion(path)
 57	if !MatchPathMajor(version, pathMajor) {
 58		if pathMajor == "" {
 59			pathMajor = "v0 or v1"
 60		}
 61		if pathMajor[0] == '.' { // .v1
 62			pathMajor = pathMajor[1:]
 63		}
 64		return fmt.Errorf("mismatched module path %v and version %v (want %v)", path, version, pathMajor)
 65	}
 66	return nil
 67}
 68
 69// firstPathOK reports whether r can appear in the first element of a module path.
 70// The first element of the path must be an LDH domain name, at least for now.
 71// To avoid case ambiguity, the domain name must be entirely lower case.
 72func firstPathOK(r rune) bool {
 73	return r == '-' || r == '.' ||
 74		'0' <= r && r <= '9' ||
 75		'a' <= r && r <= 'z'
 76}
 77
 78// pathOK reports whether r can appear in an import path element.
 79// Paths can be ASCII letters, ASCII digits, and limited ASCII punctuation: + - . _ and ~.
 80// This matches what "go get" has historically recognized in import paths.
 81// TODO(rsc): We would like to allow Unicode letters, but that requires additional
 82// care in the safe encoding (see note below).
 83func pathOK(r rune) bool {
 84	if r < utf8.RuneSelf {
 85		return r == '+' || r == '-' || r == '.' || r == '_' || r == '~' ||
 86			'0' <= r && r <= '9' ||
 87			'A' <= r && r <= 'Z' ||
 88			'a' <= r && r <= 'z'
 89	}
 90	return false
 91}
 92
 93// fileNameOK reports whether r can appear in a file name.
 94// For now we allow all Unicode letters but otherwise limit to pathOK plus a few more punctuation characters.
 95// If we expand the set of allowed characters here, we have to
 96// work harder at detecting potential case-folding and normalization collisions.
 97// See note about "safe encoding" below.
 98func fileNameOK(r rune) bool {
 99	if r < utf8.RuneSelf {
100		// Entire set of ASCII punctuation, from which we remove characters:
101		//     ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~
102		// We disallow some shell special characters: " ' * < > ? ` |
103		// (Note that some of those are disallowed by the Windows file system as well.)
104		// We also disallow path separators / : and \ (fileNameOK is only called on path element characters).
105		// We allow spaces (U+0020) in file names.
106		const allowed = "!#$%&()+,-.=@[]^_{}~ "
107		if '0' <= r && r <= '9' || 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' {
108			return true
109		}
110		for i := 0; i < len(allowed); i++ {
111			if rune(allowed[i]) == r {
112				return true
113			}
114		}
115		return false
116	}
117	// It may be OK to add more ASCII punctuation here, but only carefully.
118	// For example Windows disallows < > \, and macOS disallows :, so we must not allow those.
119	return unicode.IsLetter(r)
120}
121
122// CheckPath checks that a module path is valid.
123func CheckPath(path string) error {
124	if err := checkPath(path, false); err != nil {
125		return fmt.Errorf("malformed module path %q: %v", path, err)
126	}
127	i := strings.Index(path, "/")
128	if i < 0 {
129		i = len(path)
130	}
131	if i == 0 {
132		return fmt.Errorf("malformed module path %q: leading slash", path)
133	}
134	if !strings.Contains(path[:i], ".") {
135		return fmt.Errorf("malformed module path %q: missing dot in first path element", path)
136	}
137	if path[0] == '-' {
138		return fmt.Errorf("malformed module path %q: leading dash in first path element", path)
139	}
140	for _, r := range path[:i] {
141		if !firstPathOK(r) {
142			return fmt.Errorf("malformed module path %q: invalid char %q in first path element", path, r)
143		}
144	}
145	if _, _, ok := SplitPathVersion(path); !ok {
146		return fmt.Errorf("malformed module path %q: invalid version", path)
147	}
148	return nil
149}
150
151// CheckImportPath checks that an import path is valid.
152func CheckImportPath(path string) error {
153	if err := checkPath(path, false); err != nil {
154		return fmt.Errorf("malformed import path %q: %v", path, err)
155	}
156	return nil
157}
158
159// checkPath checks that a general path is valid.
160// It returns an error describing why but not mentioning path.
161// Because these checks apply to both module paths and import paths,
162// the caller is expected to add the "malformed ___ path %q: " prefix.
163// fileName indicates whether the final element of the path is a file name
164// (as opposed to a directory name).
165func checkPath(path string, fileName bool) error {
166	if !utf8.ValidString(path) {
167		return fmt.Errorf("invalid UTF-8")
168	}
169	if path == "" {
170		return fmt.Errorf("empty string")
171	}
172	if strings.Contains(path, "..") {
173		return fmt.Errorf("double dot")
174	}
175	if strings.Contains(path, "//") {
176		return fmt.Errorf("double slash")
177	}
178	if path[len(path)-1] == '/' {
179		return fmt.Errorf("trailing slash")
180	}
181	elemStart := 0
182	for i, r := range path {
183		if r == '/' {
184			if err := checkElem(path[elemStart:i], fileName); err != nil {
185				return err
186			}
187			elemStart = i + 1
188		}
189	}
190	if err := checkElem(path[elemStart:], fileName); err != nil {
191		return err
192	}
193	return nil
194}
195
196// checkElem checks whether an individual path element is valid.
197// fileName indicates whether the element is a file name (not a directory name).
198func checkElem(elem string, fileName bool) error {
199	if elem == "" {
200		return fmt.Errorf("empty path element")
201	}
202	if strings.Count(elem, ".") == len(elem) {
203		return fmt.Errorf("invalid path element %q", elem)
204	}
205	if elem[0] == '.' && !fileName {
206		return fmt.Errorf("leading dot in path element")
207	}
208	if elem[len(elem)-1] == '.' {
209		return fmt.Errorf("trailing dot in path element")
210	}
211	charOK := pathOK
212	if fileName {
213		charOK = fileNameOK
214	}
215	for _, r := range elem {
216		if !charOK(r) {
217			return fmt.Errorf("invalid char %q", r)
218		}
219	}
220
221	// Windows disallows a bunch of path elements, sadly.
222	// See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
223	short := elem
224	if i := strings.Index(short, "."); i >= 0 {
225		short = short[:i]
226	}
227	for _, bad := range badWindowsNames {
228		if strings.EqualFold(bad, short) {
229			return fmt.Errorf("disallowed path element %q", elem)
230		}
231	}
232	return nil
233}
234
235// CheckFilePath checks whether a slash-separated file path is valid.
236func CheckFilePath(path string) error {
237	if err := checkPath(path, true); err != nil {
238		return fmt.Errorf("malformed file path %q: %v", path, err)
239	}
240	return nil
241}
242
243// badWindowsNames are the reserved file path elements on Windows.
244// See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
245var badWindowsNames = []string{
246	"CON",
247	"PRN",
248	"AUX",
249	"NUL",
250	"COM1",
251	"COM2",
252	"COM3",
253	"COM4",
254	"COM5",
255	"COM6",
256	"COM7",
257	"COM8",
258	"COM9",
259	"LPT1",
260	"LPT2",
261	"LPT3",
262	"LPT4",
263	"LPT5",
264	"LPT6",
265	"LPT7",
266	"LPT8",
267	"LPT9",
268}
269
270// SplitPathVersion returns prefix and major version such that prefix+pathMajor == path
271// and version is either empty or "/vN" for N >= 2.
272// As a special case, gopkg.in paths are recognized directly;
273// they require ".vN" instead of "/vN", and for all N, not just N >= 2.
274func SplitPathVersion(path string) (prefix, pathMajor string, ok bool) {
275	if strings.HasPrefix(path, "gopkg.in/") {
276		return splitGopkgIn(path)
277	}
278
279	i := len(path)
280	dot := false
281	for i > 0 && ('0' <= path[i-1] && path[i-1] <= '9' || path[i-1] == '.') {
282		if path[i-1] == '.' {
283			dot = true
284		}
285		i--
286	}
287	if i <= 1 || i == len(path) || path[i-1] != 'v' || path[i-2] != '/' {
288		return path, "", true
289	}
290	prefix, pathMajor = path[:i-2], path[i-2:]
291	if dot || len(pathMajor) <= 2 || pathMajor[2] == '0' || pathMajor == "/v1" {
292		return path, "", false
293	}
294	return prefix, pathMajor, true
295}
296
297// splitGopkgIn is like SplitPathVersion but only for gopkg.in paths.
298func splitGopkgIn(path string) (prefix, pathMajor string, ok bool) {
299	if !strings.HasPrefix(path, "gopkg.in/") {
300		return path, "", false
301	}
302	i := len(path)
303	if strings.HasSuffix(path, "-unstable") {
304		i -= len("-unstable")
305	}
306	for i > 0 && ('0' <= path[i-1] && path[i-1] <= '9') {
307		i--
308	}
309	if i <= 1 || path[i-1] != 'v' || path[i-2] != '.' {
310		// All gopkg.in paths must end in vN for some N.
311		return path, "", false
312	}
313	prefix, pathMajor = path[:i-2], path[i-2:]
314	if len(pathMajor) <= 2 || pathMajor[2] == '0' && pathMajor != ".v0" {
315		return path, "", false
316	}
317	return prefix, pathMajor, true
318}
319
320// MatchPathMajor reports whether the semantic version v
321// matches the path major version pathMajor.
322func MatchPathMajor(v, pathMajor string) bool {
323	if strings.HasPrefix(pathMajor, ".v") && strings.HasSuffix(pathMajor, "-unstable") {
324		pathMajor = strings.TrimSuffix(pathMajor, "-unstable")
325	}
326	if strings.HasPrefix(v, "v0.0.0-") && pathMajor == ".v1" {
327		// Allow old bug in pseudo-versions that generated v0.0.0- pseudoversion for gopkg .v1.
328		// For example, gopkg.in/yaml.v2@v2.2.1's go.mod requires gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405.
329		return true
330	}
331	m := semver.Major(v)
332	if pathMajor == "" {
333		return m == "v0" || m == "v1" || semver.Build(v) == "+incompatible"
334	}
335	return (pathMajor[0] == '/' || pathMajor[0] == '.') && m == pathMajor[1:]
336}
337
338// CanonicalVersion returns the canonical form of the version string v.
339// It is the same as semver.Canonical(v) except that it preserves the special build suffix "+incompatible".
340func CanonicalVersion(v string) string {
341	cv := semver.Canonical(v)
342	if semver.Build(v) == "+incompatible" {
343		cv += "+incompatible"
344	}
345	return cv
346}
347
348// Sort sorts the list by Path, breaking ties by comparing Versions.
349func Sort(list []Version) {
350	sort.Slice(list, func(i, j int) bool {
351		mi := list[i]
352		mj := list[j]
353		if mi.Path != mj.Path {
354			return mi.Path < mj.Path
355		}
356		// To help go.sum formatting, allow version/file.
357		// Compare semver prefix by semver rules,
358		// file by string order.
359		vi := mi.Version
360		vj := mj.Version
361		var fi, fj string
362		if k := strings.Index(vi, "/"); k >= 0 {
363			vi, fi = vi[:k], vi[k:]
364		}
365		if k := strings.Index(vj, "/"); k >= 0 {
366			vj, fj = vj[:k], vj[k:]
367		}
368		if vi != vj {
369			return semver.Compare(vi, vj) < 0
370		}
371		return fi < fj
372	})
373}
374
375// Safe encodings
376//
377// Module paths appear as substrings of file system paths
378// (in the download cache) and of web server URLs in the proxy protocol.
379// In general we cannot rely on file systems to be case-sensitive,
380// nor can we rely on web servers, since they read from file systems.
381// That is, we cannot rely on the file system to keep rsc.io/QUOTE
382// and rsc.io/quote separate. Windows and macOS don't.
383// Instead, we must never require two different casings of a file path.
384// Because we want the download cache to match the proxy protocol,
385// and because we want the proxy protocol to be possible to serve
386// from a tree of static files (which might be stored on a case-insensitive
387// file system), the proxy protocol must never require two different casings
388// of a URL path either.
389//
390// One possibility would be to make the safe encoding be the lowercase
391// hexadecimal encoding of the actual path bytes. This would avoid ever
392// needing different casings of a file path, but it would be fairly illegible
393// to most programmers when those paths appeared in the file system
394// (including in file paths in compiler errors and stack traces)
395// in web server logs, and so on. Instead, we want a safe encoding that
396// leaves most paths unaltered.
397//
398// The safe encoding is this:
399// replace every uppercase letter with an exclamation mark
400// followed by the letter's lowercase equivalent.
401//
402// For example,
403// github.com/Azure/azure-sdk-for-go ->  github.com/!azure/azure-sdk-for-go.
404// github.com/GoogleCloudPlatform/cloudsql-proxy -> github.com/!google!cloud!platform/cloudsql-proxy
405// github.com/Sirupsen/logrus -> github.com/!sirupsen/logrus.
406//
407// Import paths that avoid upper-case letters are left unchanged.
408// Note that because import paths are ASCII-only and avoid various
409// problematic punctuation (like : < and >), the safe encoding is also ASCII-only
410// and avoids the same problematic punctuation.
411//
412// Import paths have never allowed exclamation marks, so there is no
413// need to define how to encode a literal !.
414//
415// Although paths are disallowed from using Unicode (see pathOK above),
416// the eventual plan is to allow Unicode letters as well, to assume that
417// file systems and URLs are Unicode-safe (storing UTF-8), and apply
418// the !-for-uppercase convention. Note however that not all runes that
419// are different but case-fold equivalent are an upper/lower pair.
420// For example, U+004B ('K'), U+006B ('k'), and U+212A ('K' for Kelvin)
421// are considered to case-fold to each other. When we do add Unicode
422// letters, we must not assume that upper/lower are the only case-equivalent pairs.
423// Perhaps the Kelvin symbol would be disallowed entirely, for example.
424// Or perhaps it would encode as "!!k", or perhaps as "(212A)".
425//
426// Also, it would be nice to allow Unicode marks as well as letters,
427// but marks include combining marks, and then we must deal not
428// only with case folding but also normalization: both U+00E9 ('é')
429// and U+0065 U+0301 ('e' followed by combining acute accent)
430// look the same on the page and are treated by some file systems
431// as the same path. If we do allow Unicode marks in paths, there
432// must be some kind of normalization to allow only one canonical
433// encoding of any character used in an import path.
434
435// EncodePath returns the safe encoding of the given module path.
436// It fails if the module path is invalid.
437func EncodePath(path string) (encoding string, err error) {
438	if err := CheckPath(path); err != nil {
439		return "", err
440	}
441
442	return encodeString(path)
443}
444
445// EncodeVersion returns the safe encoding of the given module version.
446// Versions are allowed to be in non-semver form but must be valid file names
447// and not contain exclamation marks.
448func EncodeVersion(v string) (encoding string, err error) {
449	if err := checkElem(v, true); err != nil || strings.Contains(v, "!") {
450		return "", fmt.Errorf("disallowed version string %q", v)
451	}
452	return encodeString(v)
453}
454
455func encodeString(s string) (encoding string, err error) {
456	haveUpper := false
457	for _, r := range s {
458		if r == '!' || r >= utf8.RuneSelf {
459			// This should be disallowed by CheckPath, but diagnose anyway.
460			// The correctness of the encoding loop below depends on it.
461			return "", fmt.Errorf("internal error: inconsistency in EncodePath")
462		}
463		if 'A' <= r && r <= 'Z' {
464			haveUpper = true
465		}
466	}
467
468	if !haveUpper {
469		return s, nil
470	}
471
472	var buf []byte
473	for _, r := range s {
474		if 'A' <= r && r <= 'Z' {
475			buf = append(buf, '!', byte(r+'a'-'A'))
476		} else {
477			buf = append(buf, byte(r))
478		}
479	}
480	return string(buf), nil
481}
482
483// DecodePath returns the module path of the given safe encoding.
484// It fails if the encoding is invalid or encodes an invalid path.
485func DecodePath(encoding string) (path string, err error) {
486	path, ok := decodeString(encoding)
487	if !ok {
488		return "", fmt.Errorf("invalid module path encoding %q", encoding)
489	}
490	if err := CheckPath(path); err != nil {
491		return "", fmt.Errorf("invalid module path encoding %q: %v", encoding, err)
492	}
493	return path, nil
494}
495
496// DecodeVersion returns the version string for the given safe encoding.
497// It fails if the encoding is invalid or encodes an invalid version.
498// Versions are allowed to be in non-semver form but must be valid file names
499// and not contain exclamation marks.
500func DecodeVersion(encoding string) (v string, err error) {
501	v, ok := decodeString(encoding)
502	if !ok {
503		return "", fmt.Errorf("invalid version encoding %q", encoding)
504	}
505	if err := checkElem(v, true); err != nil {
506		return "", fmt.Errorf("disallowed version string %q", v)
507	}
508	return v, nil
509}
510
511func decodeString(encoding string) (string, bool) {
512	var buf []byte
513
514	bang := false
515	for _, r := range encoding {
516		if r >= utf8.RuneSelf {
517			return "", false
518		}
519		if bang {
520			bang = false
521			if r < 'a' || 'z' < r {
522				return "", false
523			}
524			buf = append(buf, byte(r+'A'-'a'))
525			continue
526		}
527		if r == '!' {
528			bang = true
529			continue
530		}
531		if 'A' <= r && r <= 'Z' {
532			return "", false
533		}
534		buf = append(buf, byte(r))
535	}
536	if bang {
537		return "", false
538	}
539	return string(buf), true
540}