1// Copyright 2018 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5// Package module defines the module.Version type
6// along with support code.
7package module
8
9// IMPORTANT NOTE
10//
11// This file essentially defines the set of valid import paths for the go command.
12// There are many subtle considerations, including Unicode ambiguity,
13// security, network, and file system representations.
14//
15// This file also defines the set of valid module path and version combinations,
16// another topic with many subtle considerations.
17//
18// Changes to the semantics in this file require approval from rsc.
19
20import (
21 "fmt"
22 "sort"
23 "strings"
24 "unicode"
25 "unicode/utf8"
26
27 "golang.org/x/tools/internal/semver"
28)
29
30// A Version is defined by a module path and version pair.
31type Version struct {
32 Path string
33
34 // Version is usually a semantic version in canonical form.
35 // There are two exceptions to this general rule.
36 // First, the top-level target of a build has no specific version
37 // and uses Version = "".
38 // Second, during MVS calculations the version "none" is used
39 // to represent the decision to take no version of a given module.
40 Version string `json:",omitempty"`
41}
42
43// Check checks that a given module path, version pair is valid.
44// In addition to the path being a valid module path
45// and the version being a valid semantic version,
46// the two must correspond.
47// For example, the path "yaml/v2" only corresponds to
48// semantic versions beginning with "v2.".
49func Check(path, version string) error {
50 if err := CheckPath(path); err != nil {
51 return err
52 }
53 if !semver.IsValid(version) {
54 return fmt.Errorf("malformed semantic version %v", version)
55 }
56 _, pathMajor, _ := SplitPathVersion(path)
57 if !MatchPathMajor(version, pathMajor) {
58 if pathMajor == "" {
59 pathMajor = "v0 or v1"
60 }
61 if pathMajor[0] == '.' { // .v1
62 pathMajor = pathMajor[1:]
63 }
64 return fmt.Errorf("mismatched module path %v and version %v (want %v)", path, version, pathMajor)
65 }
66 return nil
67}
68
69// firstPathOK reports whether r can appear in the first element of a module path.
70// The first element of the path must be an LDH domain name, at least for now.
71// To avoid case ambiguity, the domain name must be entirely lower case.
72func firstPathOK(r rune) bool {
73 return r == '-' || r == '.' ||
74 '0' <= r && r <= '9' ||
75 'a' <= r && r <= 'z'
76}
77
78// pathOK reports whether r can appear in an import path element.
79// Paths can be ASCII letters, ASCII digits, and limited ASCII punctuation: + - . _ and ~.
80// This matches what "go get" has historically recognized in import paths.
81// TODO(rsc): We would like to allow Unicode letters, but that requires additional
82// care in the safe encoding (see note below).
83func pathOK(r rune) bool {
84 if r < utf8.RuneSelf {
85 return r == '+' || r == '-' || r == '.' || r == '_' || r == '~' ||
86 '0' <= r && r <= '9' ||
87 'A' <= r && r <= 'Z' ||
88 'a' <= r && r <= 'z'
89 }
90 return false
91}
92
93// fileNameOK reports whether r can appear in a file name.
94// For now we allow all Unicode letters but otherwise limit to pathOK plus a few more punctuation characters.
95// If we expand the set of allowed characters here, we have to
96// work harder at detecting potential case-folding and normalization collisions.
97// See note about "safe encoding" below.
98func fileNameOK(r rune) bool {
99 if r < utf8.RuneSelf {
100 // Entire set of ASCII punctuation, from which we remove characters:
101 // ! " # $ % & ' ( ) * + , - . / : ; < = > ? @ [ \ ] ^ _ ` { | } ~
102 // We disallow some shell special characters: " ' * < > ? ` |
103 // (Note that some of those are disallowed by the Windows file system as well.)
104 // We also disallow path separators / : and \ (fileNameOK is only called on path element characters).
105 // We allow spaces (U+0020) in file names.
106 const allowed = "!#$%&()+,-.=@[]^_{}~ "
107 if '0' <= r && r <= '9' || 'A' <= r && r <= 'Z' || 'a' <= r && r <= 'z' {
108 return true
109 }
110 for i := 0; i < len(allowed); i++ {
111 if rune(allowed[i]) == r {
112 return true
113 }
114 }
115 return false
116 }
117 // It may be OK to add more ASCII punctuation here, but only carefully.
118 // For example Windows disallows < > \, and macOS disallows :, so we must not allow those.
119 return unicode.IsLetter(r)
120}
121
122// CheckPath checks that a module path is valid.
123func CheckPath(path string) error {
124 if err := checkPath(path, false); err != nil {
125 return fmt.Errorf("malformed module path %q: %v", path, err)
126 }
127 i := strings.Index(path, "/")
128 if i < 0 {
129 i = len(path)
130 }
131 if i == 0 {
132 return fmt.Errorf("malformed module path %q: leading slash", path)
133 }
134 if !strings.Contains(path[:i], ".") {
135 return fmt.Errorf("malformed module path %q: missing dot in first path element", path)
136 }
137 if path[0] == '-' {
138 return fmt.Errorf("malformed module path %q: leading dash in first path element", path)
139 }
140 for _, r := range path[:i] {
141 if !firstPathOK(r) {
142 return fmt.Errorf("malformed module path %q: invalid char %q in first path element", path, r)
143 }
144 }
145 if _, _, ok := SplitPathVersion(path); !ok {
146 return fmt.Errorf("malformed module path %q: invalid version", path)
147 }
148 return nil
149}
150
151// CheckImportPath checks that an import path is valid.
152func CheckImportPath(path string) error {
153 if err := checkPath(path, false); err != nil {
154 return fmt.Errorf("malformed import path %q: %v", path, err)
155 }
156 return nil
157}
158
159// checkPath checks that a general path is valid.
160// It returns an error describing why but not mentioning path.
161// Because these checks apply to both module paths and import paths,
162// the caller is expected to add the "malformed ___ path %q: " prefix.
163// fileName indicates whether the final element of the path is a file name
164// (as opposed to a directory name).
165func checkPath(path string, fileName bool) error {
166 if !utf8.ValidString(path) {
167 return fmt.Errorf("invalid UTF-8")
168 }
169 if path == "" {
170 return fmt.Errorf("empty string")
171 }
172 if strings.Contains(path, "..") {
173 return fmt.Errorf("double dot")
174 }
175 if strings.Contains(path, "//") {
176 return fmt.Errorf("double slash")
177 }
178 if path[len(path)-1] == '/' {
179 return fmt.Errorf("trailing slash")
180 }
181 elemStart := 0
182 for i, r := range path {
183 if r == '/' {
184 if err := checkElem(path[elemStart:i], fileName); err != nil {
185 return err
186 }
187 elemStart = i + 1
188 }
189 }
190 if err := checkElem(path[elemStart:], fileName); err != nil {
191 return err
192 }
193 return nil
194}
195
196// checkElem checks whether an individual path element is valid.
197// fileName indicates whether the element is a file name (not a directory name).
198func checkElem(elem string, fileName bool) error {
199 if elem == "" {
200 return fmt.Errorf("empty path element")
201 }
202 if strings.Count(elem, ".") == len(elem) {
203 return fmt.Errorf("invalid path element %q", elem)
204 }
205 if elem[0] == '.' && !fileName {
206 return fmt.Errorf("leading dot in path element")
207 }
208 if elem[len(elem)-1] == '.' {
209 return fmt.Errorf("trailing dot in path element")
210 }
211 charOK := pathOK
212 if fileName {
213 charOK = fileNameOK
214 }
215 for _, r := range elem {
216 if !charOK(r) {
217 return fmt.Errorf("invalid char %q", r)
218 }
219 }
220
221 // Windows disallows a bunch of path elements, sadly.
222 // See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
223 short := elem
224 if i := strings.Index(short, "."); i >= 0 {
225 short = short[:i]
226 }
227 for _, bad := range badWindowsNames {
228 if strings.EqualFold(bad, short) {
229 return fmt.Errorf("disallowed path element %q", elem)
230 }
231 }
232 return nil
233}
234
235// CheckFilePath checks whether a slash-separated file path is valid.
236func CheckFilePath(path string) error {
237 if err := checkPath(path, true); err != nil {
238 return fmt.Errorf("malformed file path %q: %v", path, err)
239 }
240 return nil
241}
242
243// badWindowsNames are the reserved file path elements on Windows.
244// See https://docs.microsoft.com/en-us/windows/desktop/fileio/naming-a-file
245var badWindowsNames = []string{
246 "CON",
247 "PRN",
248 "AUX",
249 "NUL",
250 "COM1",
251 "COM2",
252 "COM3",
253 "COM4",
254 "COM5",
255 "COM6",
256 "COM7",
257 "COM8",
258 "COM9",
259 "LPT1",
260 "LPT2",
261 "LPT3",
262 "LPT4",
263 "LPT5",
264 "LPT6",
265 "LPT7",
266 "LPT8",
267 "LPT9",
268}
269
270// SplitPathVersion returns prefix and major version such that prefix+pathMajor == path
271// and version is either empty or "/vN" for N >= 2.
272// As a special case, gopkg.in paths are recognized directly;
273// they require ".vN" instead of "/vN", and for all N, not just N >= 2.
274func SplitPathVersion(path string) (prefix, pathMajor string, ok bool) {
275 if strings.HasPrefix(path, "gopkg.in/") {
276 return splitGopkgIn(path)
277 }
278
279 i := len(path)
280 dot := false
281 for i > 0 && ('0' <= path[i-1] && path[i-1] <= '9' || path[i-1] == '.') {
282 if path[i-1] == '.' {
283 dot = true
284 }
285 i--
286 }
287 if i <= 1 || i == len(path) || path[i-1] != 'v' || path[i-2] != '/' {
288 return path, "", true
289 }
290 prefix, pathMajor = path[:i-2], path[i-2:]
291 if dot || len(pathMajor) <= 2 || pathMajor[2] == '0' || pathMajor == "/v1" {
292 return path, "", false
293 }
294 return prefix, pathMajor, true
295}
296
297// splitGopkgIn is like SplitPathVersion but only for gopkg.in paths.
298func splitGopkgIn(path string) (prefix, pathMajor string, ok bool) {
299 if !strings.HasPrefix(path, "gopkg.in/") {
300 return path, "", false
301 }
302 i := len(path)
303 if strings.HasSuffix(path, "-unstable") {
304 i -= len("-unstable")
305 }
306 for i > 0 && ('0' <= path[i-1] && path[i-1] <= '9') {
307 i--
308 }
309 if i <= 1 || path[i-1] != 'v' || path[i-2] != '.' {
310 // All gopkg.in paths must end in vN for some N.
311 return path, "", false
312 }
313 prefix, pathMajor = path[:i-2], path[i-2:]
314 if len(pathMajor) <= 2 || pathMajor[2] == '0' && pathMajor != ".v0" {
315 return path, "", false
316 }
317 return prefix, pathMajor, true
318}
319
320// MatchPathMajor reports whether the semantic version v
321// matches the path major version pathMajor.
322func MatchPathMajor(v, pathMajor string) bool {
323 if strings.HasPrefix(pathMajor, ".v") && strings.HasSuffix(pathMajor, "-unstable") {
324 pathMajor = strings.TrimSuffix(pathMajor, "-unstable")
325 }
326 if strings.HasPrefix(v, "v0.0.0-") && pathMajor == ".v1" {
327 // Allow old bug in pseudo-versions that generated v0.0.0- pseudoversion for gopkg .v1.
328 // For example, gopkg.in/yaml.v2@v2.2.1's go.mod requires gopkg.in/check.v1 v0.0.0-20161208181325-20d25e280405.
329 return true
330 }
331 m := semver.Major(v)
332 if pathMajor == "" {
333 return m == "v0" || m == "v1" || semver.Build(v) == "+incompatible"
334 }
335 return (pathMajor[0] == '/' || pathMajor[0] == '.') && m == pathMajor[1:]
336}
337
338// CanonicalVersion returns the canonical form of the version string v.
339// It is the same as semver.Canonical(v) except that it preserves the special build suffix "+incompatible".
340func CanonicalVersion(v string) string {
341 cv := semver.Canonical(v)
342 if semver.Build(v) == "+incompatible" {
343 cv += "+incompatible"
344 }
345 return cv
346}
347
348// Sort sorts the list by Path, breaking ties by comparing Versions.
349func Sort(list []Version) {
350 sort.Slice(list, func(i, j int) bool {
351 mi := list[i]
352 mj := list[j]
353 if mi.Path != mj.Path {
354 return mi.Path < mj.Path
355 }
356 // To help go.sum formatting, allow version/file.
357 // Compare semver prefix by semver rules,
358 // file by string order.
359 vi := mi.Version
360 vj := mj.Version
361 var fi, fj string
362 if k := strings.Index(vi, "/"); k >= 0 {
363 vi, fi = vi[:k], vi[k:]
364 }
365 if k := strings.Index(vj, "/"); k >= 0 {
366 vj, fj = vj[:k], vj[k:]
367 }
368 if vi != vj {
369 return semver.Compare(vi, vj) < 0
370 }
371 return fi < fj
372 })
373}
374
375// Safe encodings
376//
377// Module paths appear as substrings of file system paths
378// (in the download cache) and of web server URLs in the proxy protocol.
379// In general we cannot rely on file systems to be case-sensitive,
380// nor can we rely on web servers, since they read from file systems.
381// That is, we cannot rely on the file system to keep rsc.io/QUOTE
382// and rsc.io/quote separate. Windows and macOS don't.
383// Instead, we must never require two different casings of a file path.
384// Because we want the download cache to match the proxy protocol,
385// and because we want the proxy protocol to be possible to serve
386// from a tree of static files (which might be stored on a case-insensitive
387// file system), the proxy protocol must never require two different casings
388// of a URL path either.
389//
390// One possibility would be to make the safe encoding be the lowercase
391// hexadecimal encoding of the actual path bytes. This would avoid ever
392// needing different casings of a file path, but it would be fairly illegible
393// to most programmers when those paths appeared in the file system
394// (including in file paths in compiler errors and stack traces)
395// in web server logs, and so on. Instead, we want a safe encoding that
396// leaves most paths unaltered.
397//
398// The safe encoding is this:
399// replace every uppercase letter with an exclamation mark
400// followed by the letter's lowercase equivalent.
401//
402// For example,
403// github.com/Azure/azure-sdk-for-go -> github.com/!azure/azure-sdk-for-go.
404// github.com/GoogleCloudPlatform/cloudsql-proxy -> github.com/!google!cloud!platform/cloudsql-proxy
405// github.com/Sirupsen/logrus -> github.com/!sirupsen/logrus.
406//
407// Import paths that avoid upper-case letters are left unchanged.
408// Note that because import paths are ASCII-only and avoid various
409// problematic punctuation (like : < and >), the safe encoding is also ASCII-only
410// and avoids the same problematic punctuation.
411//
412// Import paths have never allowed exclamation marks, so there is no
413// need to define how to encode a literal !.
414//
415// Although paths are disallowed from using Unicode (see pathOK above),
416// the eventual plan is to allow Unicode letters as well, to assume that
417// file systems and URLs are Unicode-safe (storing UTF-8), and apply
418// the !-for-uppercase convention. Note however that not all runes that
419// are different but case-fold equivalent are an upper/lower pair.
420// For example, U+004B ('K'), U+006B ('k'), and U+212A ('K' for Kelvin)
421// are considered to case-fold to each other. When we do add Unicode
422// letters, we must not assume that upper/lower are the only case-equivalent pairs.
423// Perhaps the Kelvin symbol would be disallowed entirely, for example.
424// Or perhaps it would encode as "!!k", or perhaps as "(212A)".
425//
426// Also, it would be nice to allow Unicode marks as well as letters,
427// but marks include combining marks, and then we must deal not
428// only with case folding but also normalization: both U+00E9 ('é')
429// and U+0065 U+0301 ('e' followed by combining acute accent)
430// look the same on the page and are treated by some file systems
431// as the same path. If we do allow Unicode marks in paths, there
432// must be some kind of normalization to allow only one canonical
433// encoding of any character used in an import path.
434
435// EncodePath returns the safe encoding of the given module path.
436// It fails if the module path is invalid.
437func EncodePath(path string) (encoding string, err error) {
438 if err := CheckPath(path); err != nil {
439 return "", err
440 }
441
442 return encodeString(path)
443}
444
445// EncodeVersion returns the safe encoding of the given module version.
446// Versions are allowed to be in non-semver form but must be valid file names
447// and not contain exclamation marks.
448func EncodeVersion(v string) (encoding string, err error) {
449 if err := checkElem(v, true); err != nil || strings.Contains(v, "!") {
450 return "", fmt.Errorf("disallowed version string %q", v)
451 }
452 return encodeString(v)
453}
454
455func encodeString(s string) (encoding string, err error) {
456 haveUpper := false
457 for _, r := range s {
458 if r == '!' || r >= utf8.RuneSelf {
459 // This should be disallowed by CheckPath, but diagnose anyway.
460 // The correctness of the encoding loop below depends on it.
461 return "", fmt.Errorf("internal error: inconsistency in EncodePath")
462 }
463 if 'A' <= r && r <= 'Z' {
464 haveUpper = true
465 }
466 }
467
468 if !haveUpper {
469 return s, nil
470 }
471
472 var buf []byte
473 for _, r := range s {
474 if 'A' <= r && r <= 'Z' {
475 buf = append(buf, '!', byte(r+'a'-'A'))
476 } else {
477 buf = append(buf, byte(r))
478 }
479 }
480 return string(buf), nil
481}
482
483// DecodePath returns the module path of the given safe encoding.
484// It fails if the encoding is invalid or encodes an invalid path.
485func DecodePath(encoding string) (path string, err error) {
486 path, ok := decodeString(encoding)
487 if !ok {
488 return "", fmt.Errorf("invalid module path encoding %q", encoding)
489 }
490 if err := CheckPath(path); err != nil {
491 return "", fmt.Errorf("invalid module path encoding %q: %v", encoding, err)
492 }
493 return path, nil
494}
495
496// DecodeVersion returns the version string for the given safe encoding.
497// It fails if the encoding is invalid or encodes an invalid version.
498// Versions are allowed to be in non-semver form but must be valid file names
499// and not contain exclamation marks.
500func DecodeVersion(encoding string) (v string, err error) {
501 v, ok := decodeString(encoding)
502 if !ok {
503 return "", fmt.Errorf("invalid version encoding %q", encoding)
504 }
505 if err := checkElem(v, true); err != nil {
506 return "", fmt.Errorf("disallowed version string %q", v)
507 }
508 return v, nil
509}
510
511func decodeString(encoding string) (string, bool) {
512 var buf []byte
513
514 bang := false
515 for _, r := range encoding {
516 if r >= utf8.RuneSelf {
517 return "", false
518 }
519 if bang {
520 bang = false
521 if r < 'a' || 'z' < r {
522 return "", false
523 }
524 buf = append(buf, byte(r+'A'-'a'))
525 continue
526 }
527 if r == '!' {
528 bang = true
529 continue
530 }
531 if 'A' <= r && r <= 'Z' {
532 return "", false
533 }
534 buf = append(buf, byte(r))
535 }
536 if bang {
537 return "", false
538 }
539 return string(buf), true
540}