1// Copyright 2023 The Go Authors. All rights reserved.
  2// Use of this source code is governed by a BSD-style
  3// license that can be found in the LICENSE file.
  4
  5package protocol
  6
  7// This file declares URI, DocumentUri, and its methods.
  8//
  9// For the LSP definition of these types, see
 10// https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#uri
 11
 12import (
 13	"fmt"
 14	"net/url"
 15	"path/filepath"
 16	"strings"
 17	"unicode"
 18)
 19
 20// A DocumentURI is the URI of a client editor document.
 21//
 22// According to the LSP specification:
 23//
 24//	Care should be taken to handle encoding in URIs. For
 25//	example, some clients (such as VS Code) may encode colons
 26//	in drive letters while others do not. The URIs below are
 27//	both valid, but clients and servers should be consistent
 28//	with the form they use themselves to ensure the other party
 29//	doesnβt interpret them as distinct URIs. Clients and
 30//	servers should not assume that each other are encoding the
 31//	same way (for example a client encoding colons in drive
 32//	letters cannot assume server responses will have encoded
 33//	colons). The same applies to casing of drive letters - one
 34//	party should not assume the other party will return paths
 35//	with drive letters cased the same as it.
 36//
 37//	file:///c:/project/readme.md
 38//	file:///C%3A/project/readme.md
 39//
 40// This is done during JSON unmarshalling;
 41// see [DocumentURI.UnmarshalText] for details.
 42type DocumentURI string
 43
 44// A URI is an arbitrary URL (e.g. https), not necessarily a file.
 45type URI = string
 46
 47// UnmarshalText implements decoding of DocumentUri values.
 48//
 49// In particular, it implements a systematic correction of various odd
 50// features of the definition of DocumentUri in the LSP spec that
 51// appear to be workarounds for bugs in VS Code. For example, it may
 52// URI-encode the URI itself, so that colon becomes %3A, and it may
 53// send file://foo.go URIs that have two slashes (not three) and no
 54// hostname.
 55//
 56// We use UnmarshalText, not UnmarshalJSON, because it is called even
 57// for non-addressable values such as keys and values of map[K]V,
 58// where there is no pointer of type *K or *V on which to call
 59// UnmarshalJSON. (See Go issue #28189 for more detail.)
 60//
 61// Non-empty DocumentUris are valid "file"-scheme URIs.
 62// The empty DocumentUri is valid.
 63func (uri *DocumentURI) UnmarshalText(data []byte) (err error) {
 64	*uri, err = ParseDocumentURI(string(data))
 65	return
 66}
 67
 68// Path returns the file path for the given URI.
 69//
 70// DocumentUri("").Path() returns the empty string.
 71//
 72// Path panics if called on a URI that is not a valid filename.
 73func (uri DocumentURI) Path() (string, error) {
 74	filename, err := filename(uri)
 75	if err != nil {
 76		// e.g. ParseRequestURI failed.
 77		//
 78		// This can only affect DocumentUris created by
 79		// direct string manipulation; all DocumentUris
 80		// received from the client pass through
 81		// ParseRequestURI, which ensures validity.
 82		return "", fmt.Errorf("invalid URI %q: %w", uri, err)
 83	}
 84	return filepath.FromSlash(filename), nil
 85}
 86
 87// Dir returns the URI for the directory containing the receiver.
 88func (uri DocumentURI) Dir() (DocumentURI, error) {
 89	// XXX: Legacy comment:
 90	// This function could be more efficiently implemented by avoiding any call
 91	// to Path(), but at least consolidates URI manipulation.
 92
 93	path, err := uri.DirPath()
 94	if err != nil {
 95		return "", fmt.Errorf("invalid URI %q: %w", uri, err)
 96	}
 97
 98	return URIFromPath(path), nil
 99}
100
101// DirPath returns the file path to the directory containing this URI, which
102// must be a file URI.
103func (uri DocumentURI) DirPath() (string, error) {
104	path, err := uri.Path()
105	if err != nil {
106		return "", err
107	}
108	return filepath.Dir(path), nil
109}
110
111func filename(uri DocumentURI) (string, error) {
112	if uri == "" {
113		return "", nil
114	}
115
116	// This conservative check for the common case
117	// of a simple non-empty absolute POSIX filename
118	// avoids the allocation of a net.URL.
119	if strings.HasPrefix(string(uri), "file:///") {
120		rest := string(uri)[len("file://"):] // leave one slash
121		for i := range len(rest) {
122			b := rest[i]
123			// Reject these cases:
124			if b < ' ' || b == 0x7f || // control character
125				b == '%' || b == '+' || // URI escape
126				b == ':' || // Windows drive letter
127				b == '@' || b == '&' || b == '?' { // authority or query
128				goto slow
129			}
130		}
131		return rest, nil
132	}
133slow:
134
135	u, err := url.ParseRequestURI(string(uri))
136	if err != nil {
137		return "", err
138	}
139	if u.Scheme != fileScheme {
140		return "", fmt.Errorf("only file URIs are supported, got %q from %q", u.Scheme, uri)
141	}
142	// If the URI is a Windows URI, we trim the leading "/" and uppercase
143	// the drive letter, which will never be case sensitive.
144	if isWindowsDriveURIPath(u.Path) {
145		u.Path = strings.ToUpper(string(u.Path[1])) + u.Path[2:]
146	}
147
148	return u.Path, nil
149}
150
151// ParseDocumentURI interprets a string as a DocumentUri, applying VS
152// Code workarounds; see [DocumentURI.UnmarshalText] for details.
153func ParseDocumentURI(s string) (DocumentURI, error) {
154	if s == "" {
155		return "", nil
156	}
157
158	if !strings.HasPrefix(s, "file://") {
159		return "", fmt.Errorf("DocumentUri scheme is not 'file': %s", s)
160	}
161
162	// VS Code sends URLs with only two slashes,
163	// which are invalid. golang/go#39789.
164	if !strings.HasPrefix(s, "file:///") {
165		s = "file:///" + s[len("file://"):]
166	}
167
168	// Even though the input is a URI, it may not be in canonical form. VS Code
169	// in particular over-escapes :, @, etc. Unescape and re-encode to canonicalize.
170	path, err := url.PathUnescape(s[len("file://"):])
171	if err != nil {
172		return "", err
173	}
174
175	// File URIs from Windows may have lowercase drive letters.
176	// Since drive letters are guaranteed to be case insensitive,
177	// we change them to uppercase to remain consistent.
178	// For example, file:///c:/x/y/z becomes file:///C:/x/y/z.
179	if isWindowsDriveURIPath(path) {
180		path = path[:1] + strings.ToUpper(string(path[1])) + path[2:]
181	}
182	u := url.URL{Scheme: fileScheme, Path: path}
183	return DocumentURI(u.String()), nil
184}
185
186// URIFromPath returns DocumentUri for the supplied file path.
187// Given "", it returns "".
188func URIFromPath(path string) DocumentURI {
189	if path == "" {
190		return ""
191	}
192	if !isWindowsDrivePath(path) {
193		if abs, err := filepath.Abs(path); err == nil {
194			path = abs
195		}
196	}
197	// Check the file path again, in case it became absolute.
198	if isWindowsDrivePath(path) {
199		path = "/" + strings.ToUpper(string(path[0])) + path[1:]
200	}
201	path = filepath.ToSlash(path)
202	u := url.URL{
203		Scheme: fileScheme,
204		Path:   path,
205	}
206	return DocumentURI(u.String())
207}
208
209const fileScheme = "file"
210
211// isWindowsDrivePath returns true if the file path is of the form used by
212// Windows. We check if the path begins with a drive letter, followed by a ":".
213// For example: C:/x/y/z.
214func isWindowsDrivePath(path string) bool {
215	if len(path) < 3 {
216		return false
217	}
218	return unicode.IsLetter(rune(path[0])) && path[1] == ':'
219}
220
221// isWindowsDriveURIPath returns true if the file URI is of the format used by
222// Windows URIs. The url.Parse package does not specially handle Windows paths
223// (see golang/go#6027), so we check if the URI path has a drive prefix (e.g. "/C:").
224func isWindowsDriveURIPath(uri string) bool {
225	if len(uri) < 4 {
226		return false
227	}
228	return uri[0] == '/' && unicode.IsLetter(rune(uri[1])) && uri[2] == ':'
229}