1// Copyright 2023 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package protocol
6
7// This file declares URI, DocumentUri, and its methods.
8//
9// For the LSP definition of these types, see
10// https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#uri
11
12import (
13 "fmt"
14 "net/url"
15 "path/filepath"
16 "strings"
17 "unicode"
18)
19
20// A DocumentURI is the URI of a client editor document.
21//
22// According to the LSP specification:
23//
24// Care should be taken to handle encoding in URIs. For
25// example, some clients (such as VS Code) may encode colons
26// in drive letters while others do not. The URIs below are
27// both valid, but clients and servers should be consistent
28// with the form they use themselves to ensure the other party
29// doesnβt interpret them as distinct URIs. Clients and
30// servers should not assume that each other are encoding the
31// same way (for example a client encoding colons in drive
32// letters cannot assume server responses will have encoded
33// colons). The same applies to casing of drive letters - one
34// party should not assume the other party will return paths
35// with drive letters cased the same as it.
36//
37// file:///c:/project/readme.md
38// file:///C%3A/project/readme.md
39//
40// This is done during JSON unmarshalling;
41// see [DocumentURI.UnmarshalText] for details.
42type DocumentURI string
43
44// A URI is an arbitrary URL (e.g. https), not necessarily a file.
45type URI = string
46
47// UnmarshalText implements decoding of DocumentUri values.
48//
49// In particular, it implements a systematic correction of various odd
50// features of the definition of DocumentUri in the LSP spec that
51// appear to be workarounds for bugs in VS Code. For example, it may
52// URI-encode the URI itself, so that colon becomes %3A, and it may
53// send file://foo.go URIs that have two slashes (not three) and no
54// hostname.
55//
56// We use UnmarshalText, not UnmarshalJSON, because it is called even
57// for non-addressable values such as keys and values of map[K]V,
58// where there is no pointer of type *K or *V on which to call
59// UnmarshalJSON. (See Go issue #28189 for more detail.)
60//
61// Non-empty DocumentUris are valid "file"-scheme URIs.
62// The empty DocumentUri is valid.
63func (uri *DocumentURI) UnmarshalText(data []byte) (err error) {
64 *uri, err = ParseDocumentURI(string(data))
65 return
66}
67
68// Path returns the file path for the given URI.
69//
70// DocumentUri("").Path() returns the empty string.
71//
72// Path panics if called on a URI that is not a valid filename.
73func (uri DocumentURI) Path() (string, error) {
74 filename, err := filename(uri)
75 if err != nil {
76 // e.g. ParseRequestURI failed.
77 //
78 // This can only affect DocumentUris created by
79 // direct string manipulation; all DocumentUris
80 // received from the client pass through
81 // ParseRequestURI, which ensures validity.
82 return "", fmt.Errorf("invalid URI %q: %w", uri, err)
83 }
84 return filepath.FromSlash(filename), nil
85}
86
87// Dir returns the URI for the directory containing the receiver.
88func (uri DocumentURI) Dir() (DocumentURI, error) {
89 // XXX: Legacy comment:
90 // This function could be more efficiently implemented by avoiding any call
91 // to Path(), but at least consolidates URI manipulation.
92
93 path, err := uri.DirPath()
94 if err != nil {
95 return "", fmt.Errorf("invalid URI %q: %w", uri, err)
96 }
97
98 return URIFromPath(path), nil
99}
100
101// DirPath returns the file path to the directory containing this URI, which
102// must be a file URI.
103func (uri DocumentURI) DirPath() (string, error) {
104 path, err := uri.Path()
105 if err != nil {
106 return "", err
107 }
108 return filepath.Dir(path), nil
109}
110
111func filename(uri DocumentURI) (string, error) {
112 if uri == "" {
113 return "", nil
114 }
115
116 // This conservative check for the common case
117 // of a simple non-empty absolute POSIX filename
118 // avoids the allocation of a net.URL.
119 if strings.HasPrefix(string(uri), "file:///") {
120 rest := string(uri)[len("file://"):] // leave one slash
121 for i := range len(rest) {
122 b := rest[i]
123 // Reject these cases:
124 if b < ' ' || b == 0x7f || // control character
125 b == '%' || b == '+' || // URI escape
126 b == ':' || // Windows drive letter
127 b == '@' || b == '&' || b == '?' { // authority or query
128 goto slow
129 }
130 }
131 return rest, nil
132 }
133slow:
134
135 u, err := url.ParseRequestURI(string(uri))
136 if err != nil {
137 return "", err
138 }
139 if u.Scheme != fileScheme {
140 return "", fmt.Errorf("only file URIs are supported, got %q from %q", u.Scheme, uri)
141 }
142 // If the URI is a Windows URI, we trim the leading "/" and uppercase
143 // the drive letter, which will never be case sensitive.
144 if isWindowsDriveURIPath(u.Path) {
145 u.Path = strings.ToUpper(string(u.Path[1])) + u.Path[2:]
146 }
147
148 return u.Path, nil
149}
150
151// ParseDocumentURI interprets a string as a DocumentUri, applying VS
152// Code workarounds; see [DocumentURI.UnmarshalText] for details.
153func ParseDocumentURI(s string) (DocumentURI, error) {
154 if s == "" {
155 return "", nil
156 }
157
158 if !strings.HasPrefix(s, "file://") {
159 return "", fmt.Errorf("DocumentUri scheme is not 'file': %s", s)
160 }
161
162 // VS Code sends URLs with only two slashes,
163 // which are invalid. golang/go#39789.
164 if !strings.HasPrefix(s, "file:///") {
165 s = "file:///" + s[len("file://"):]
166 }
167
168 // Even though the input is a URI, it may not be in canonical form. VS Code
169 // in particular over-escapes :, @, etc. Unescape and re-encode to canonicalize.
170 path, err := url.PathUnescape(s[len("file://"):])
171 if err != nil {
172 return "", err
173 }
174
175 // File URIs from Windows may have lowercase drive letters.
176 // Since drive letters are guaranteed to be case insensitive,
177 // we change them to uppercase to remain consistent.
178 // For example, file:///c:/x/y/z becomes file:///C:/x/y/z.
179 if isWindowsDriveURIPath(path) {
180 path = path[:1] + strings.ToUpper(string(path[1])) + path[2:]
181 }
182 u := url.URL{Scheme: fileScheme, Path: path}
183 return DocumentURI(u.String()), nil
184}
185
186// URIFromPath returns DocumentUri for the supplied file path.
187// Given "", it returns "".
188func URIFromPath(path string) DocumentURI {
189 if path == "" {
190 return ""
191 }
192 if !isWindowsDrivePath(path) {
193 if abs, err := filepath.Abs(path); err == nil {
194 path = abs
195 }
196 }
197 // Check the file path again, in case it became absolute.
198 if isWindowsDrivePath(path) {
199 path = "/" + strings.ToUpper(string(path[0])) + path[1:]
200 }
201 path = filepath.ToSlash(path)
202 u := url.URL{
203 Scheme: fileScheme,
204 Path: path,
205 }
206 return DocumentURI(u.String())
207}
208
209const fileScheme = "file"
210
211// isWindowsDrivePath returns true if the file path is of the form used by
212// Windows. We check if the path begins with a drive letter, followed by a ":".
213// For example: C:/x/y/z.
214func isWindowsDrivePath(path string) bool {
215 if len(path) < 3 {
216 return false
217 }
218 return unicode.IsLetter(rune(path[0])) && path[1] == ':'
219}
220
221// isWindowsDriveURIPath returns true if the file URI is of the format used by
222// Windows URIs. The url.Parse package does not specially handle Windows paths
223// (see golang/go#6027), so we check if the URI path has a drive prefix (e.g. "/C:").
224func isWindowsDriveURIPath(uri string) bool {
225 if len(uri) < 4 {
226 return false
227 }
228 return uri[0] == '/' && unicode.IsLetter(rune(uri[1])) && uri[2] == ':'
229}