uri.go

  1// Copyright 2023 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5package protocol
  6
  7// This file declares URI, DocumentUri, and its methods.
  8//
  9// For the LSP definition of these types, see
 10// https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#uri
 11
 12import (
 13	"fmt"
 14	"net/url"
 15	"path/filepath"
 16	"strings"
 17	"unicode"
 18)
 19
 20// A DocumentUri is the URI of a client editor document.
 21//
 22// According to the LSP specification:
 23//
 24//	Care should be taken to handle encoding in URIs. For
 25//	example, some clients (such as VS Code) may encode colons
 26//	in drive letters while others do not. The URIs below are
 27//	both valid, but clients and servers should be consistent
 28//	with the form they use themselves to ensure the other party
 29//	doesn’t interpret them as distinct URIs. Clients and
 30//	servers should not assume that each other are encoding the
 31//	same way (for example a client encoding colons in drive
 32//	letters cannot assume server responses will have encoded
 33//	colons). The same applies to casing of drive letters - one
 34//	party should not assume the other party will return paths
 35//	with drive letters cased the same as it.
 36//
 37//	file:///c:/project/readme.md
 38//	file:///C%3A/project/readme.md
 39//
 40// This is done during JSON unmarshalling;
 41// see [DocumentUri.UnmarshalText] for details.
 42type DocumentUri string
 43
 44// A URI is an arbitrary URL (e.g. https), not necessarily a file.
 45type URI = string
 46
 47// UnmarshalText implements decoding of DocumentUri values.
 48//
 49// In particular, it implements a systematic correction of various odd
 50// features of the definition of DocumentUri in the LSP spec that
 51// appear to be workarounds for bugs in VS Code. For example, it may
 52// URI-encode the URI itself, so that colon becomes %3A, and it may
 53// send file://foo.go URIs that have two slashes (not three) and no
 54// hostname.
 55//
 56// We use UnmarshalText, not UnmarshalJSON, because it is called even
 57// for non-addressable values such as keys and values of map[K]V,
 58// where there is no pointer of type *K or *V on which to call
 59// UnmarshalJSON. (See Go issue #28189 for more detail.)
 60//
 61// Non-empty DocumentUris are valid "file"-scheme URIs.
 62// The empty DocumentUri is valid.
 63func (uri *DocumentUri) UnmarshalText(data []byte) (err error) {
 64	*uri, err = ParseDocumentUri(string(data))
 65	return
 66}
 67
 68// Path returns the file path for the given URI.
 69//
 70// DocumentUri("").Path() returns the empty string.
 71//
 72// Path panics if called on a URI that is not a valid filename.
 73func (uri DocumentUri) Path() string {
 74	filename, err := filename(uri)
 75	if err != nil {
 76		// e.g. ParseRequestURI failed.
 77		//
 78		// This can only affect DocumentUris created by
 79		// direct string manipulation; all DocumentUris
 80		// received from the client pass through
 81		// ParseRequestURI, which ensures validity.
 82		panic(err)
 83	}
 84	return filepath.FromSlash(filename)
 85}
 86
 87// Dir returns the URI for the directory containing the receiver.
 88func (uri DocumentUri) Dir() DocumentUri {
 89	// This function could be more efficiently implemented by avoiding any call
 90	// to Path(), but at least consolidates URI manipulation.
 91	return URIFromPath(uri.DirPath())
 92}
 93
 94// DirPath returns the file path to the directory containing this URI, which
 95// must be a file URI.
 96func (uri DocumentUri) DirPath() string {
 97	return filepath.Dir(uri.Path())
 98}
 99
100func filename(uri DocumentUri) (string, error) {
101	if uri == "" {
102		return "", nil
103	}
104
105	// This conservative check for the common case
106	// of a simple non-empty absolute POSIX filename
107	// avoids the allocation of a net.URL.
108	if strings.HasPrefix(string(uri), "file:///") {
109		rest := string(uri)[len("file://"):] // leave one slash
110		for i := range len(rest) {
111			b := rest[i]
112			// Reject these cases:
113			if b < ' ' || b == 0x7f || // control character
114				b == '%' || b == '+' || // URI escape
115				b == ':' || // Windows drive letter
116				b == '@' || b == '&' || b == '?' { // authority or query
117				goto slow
118			}
119		}
120		return rest, nil
121	}
122slow:
123
124	u, err := url.ParseRequestURI(string(uri))
125	if err != nil {
126		return "", err
127	}
128	if u.Scheme != fileScheme {
129		return "", fmt.Errorf("only file URIs are supported, got %q from %q", u.Scheme, uri)
130	}
131	// If the URI is a Windows URI, we trim the leading "/" and uppercase
132	// the drive letter, which will never be case sensitive.
133	if isWindowsDriveURIPath(u.Path) {
134		u.Path = strings.ToUpper(string(u.Path[1])) + u.Path[2:]
135	}
136
137	return u.Path, nil
138}
139
140// ParseDocumentUri interprets a string as a DocumentUri, applying VS
141// Code workarounds; see [DocumentUri.UnmarshalText] for details.
142func ParseDocumentUri(s string) (DocumentUri, error) {
143	if s == "" {
144		return "", nil
145	}
146
147	if !strings.HasPrefix(s, "file://") {
148		return "", fmt.Errorf("DocumentUri scheme is not 'file': %s", s)
149	}
150
151	// VS Code sends URLs with only two slashes,
152	// which are invalid. golang/go#39789.
153	if !strings.HasPrefix(s, "file:///") {
154		s = "file:///" + s[len("file://"):]
155	}
156
157	// Even though the input is a URI, it may not be in canonical form. VS Code
158	// in particular over-escapes :, @, etc. Unescape and re-encode to canonicalize.
159	path, err := url.PathUnescape(s[len("file://"):])
160	if err != nil {
161		return "", err
162	}
163
164	// File URIs from Windows may have lowercase drive letters.
165	// Since drive letters are guaranteed to be case insensitive,
166	// we change them to uppercase to remain consistent.
167	// For example, file:///c:/x/y/z becomes file:///C:/x/y/z.
168	if isWindowsDriveURIPath(path) {
169		path = path[:1] + strings.ToUpper(string(path[1])) + path[2:]
170	}
171	u := url.URL{Scheme: fileScheme, Path: path}
172	return DocumentUri(u.String()), nil
173}
174
175// URIFromPath returns DocumentUri for the supplied file path.
176// Given "", it returns "".
177func URIFromPath(path string) DocumentUri {
178	if path == "" {
179		return ""
180	}
181	if !isWindowsDrivePath(path) {
182		if abs, err := filepath.Abs(path); err == nil {
183			path = abs
184		}
185	}
186	// Check the file path again, in case it became absolute.
187	if isWindowsDrivePath(path) {
188		path = "/" + strings.ToUpper(string(path[0])) + path[1:]
189	}
190	path = filepath.ToSlash(path)
191	u := url.URL{
192		Scheme: fileScheme,
193		Path:   path,
194	}
195	return DocumentUri(u.String())
196}
197
198const fileScheme = "file"
199
200// isWindowsDrivePath returns true if the file path is of the form used by
201// Windows. We check if the path begins with a drive letter, followed by a ":".
202// For example: C:/x/y/z.
203func isWindowsDrivePath(path string) bool {
204	if len(path) < 3 {
205		return false
206	}
207	return unicode.IsLetter(rune(path[0])) && path[1] == ':'
208}
209
210// isWindowsDriveURIPath returns true if the file URI is of the format used by
211// Windows URIs. The url.Parse package does not specially handle Windows paths
212// (see golang/go#6027), so we check if the URI path has a drive prefix (e.g. "/C:").
213func isWindowsDriveURIPath(uri string) bool {
214	if len(uri) < 4 {
215		return false
216	}
217	return uri[0] == '/' && unicode.IsLetter(rune(uri[1])) && uri[2] == ':'
218}