1// Copyright 2023 The Go Authors. All rights reserved.
2// Use of this source code is governed by a BSD-style
3// license that can be found in the LICENSE file.
4
5package protocol
6
7// This file declares URI, DocumentUri, and its methods.
8//
9// For the LSP definition of these types, see
10// https://microsoft.github.io/language-server-protocol/specifications/lsp/3.17/specification/#uri
11
12import (
13 "fmt"
14 "net/url"
15 "path/filepath"
16 "strings"
17 "unicode"
18)
19
20// A DocumentUri is the URI of a client editor document.
21//
22// According to the LSP specification:
23//
24// Care should be taken to handle encoding in URIs. For
25// example, some clients (such as VS Code) may encode colons
26// in drive letters while others do not. The URIs below are
27// both valid, but clients and servers should be consistent
28// with the form they use themselves to ensure the other party
29// doesnβt interpret them as distinct URIs. Clients and
30// servers should not assume that each other are encoding the
31// same way (for example a client encoding colons in drive
32// letters cannot assume server responses will have encoded
33// colons). The same applies to casing of drive letters - one
34// party should not assume the other party will return paths
35// with drive letters cased the same as it.
36//
37// file:///c:/project/readme.md
38// file:///C%3A/project/readme.md
39//
40// This is done during JSON unmarshalling;
41// see [DocumentUri.UnmarshalText] for details.
42type DocumentUri string
43
44// A URI is an arbitrary URL (e.g. https), not necessarily a file.
45type URI = string
46
47// UnmarshalText implements decoding of DocumentUri values.
48//
49// In particular, it implements a systematic correction of various odd
50// features of the definition of DocumentUri in the LSP spec that
51// appear to be workarounds for bugs in VS Code. For example, it may
52// URI-encode the URI itself, so that colon becomes %3A, and it may
53// send file://foo.go URIs that have two slashes (not three) and no
54// hostname.
55//
56// We use UnmarshalText, not UnmarshalJSON, because it is called even
57// for non-addressable values such as keys and values of map[K]V,
58// where there is no pointer of type *K or *V on which to call
59// UnmarshalJSON. (See Go issue #28189 for more detail.)
60//
61// Non-empty DocumentUris are valid "file"-scheme URIs.
62// The empty DocumentUri is valid.
63func (uri *DocumentUri) UnmarshalText(data []byte) (err error) {
64 *uri, err = ParseDocumentUri(string(data))
65 return
66}
67
68// Path returns the file path for the given URI.
69//
70// DocumentUri("").Path() returns the empty string.
71//
72// Path panics if called on a URI that is not a valid filename.
73func (uri DocumentUri) Path() string {
74 filename, err := filename(uri)
75 if err != nil {
76 // e.g. ParseRequestURI failed.
77 //
78 // This can only affect DocumentUris created by
79 // direct string manipulation; all DocumentUris
80 // received from the client pass through
81 // ParseRequestURI, which ensures validity.
82 panic(err)
83 }
84 return filepath.FromSlash(filename)
85}
86
87// Dir returns the URI for the directory containing the receiver.
88func (uri DocumentUri) Dir() DocumentUri {
89 // This function could be more efficiently implemented by avoiding any call
90 // to Path(), but at least consolidates URI manipulation.
91 return URIFromPath(uri.DirPath())
92}
93
94// DirPath returns the file path to the directory containing this URI, which
95// must be a file URI.
96func (uri DocumentUri) DirPath() string {
97 return filepath.Dir(uri.Path())
98}
99
100func filename(uri DocumentUri) (string, error) {
101 if uri == "" {
102 return "", nil
103 }
104
105 // This conservative check for the common case
106 // of a simple non-empty absolute POSIX filename
107 // avoids the allocation of a net.URL.
108 if strings.HasPrefix(string(uri), "file:///") {
109 rest := string(uri)[len("file://"):] // leave one slash
110 for i := range len(rest) {
111 b := rest[i]
112 // Reject these cases:
113 if b < ' ' || b == 0x7f || // control character
114 b == '%' || b == '+' || // URI escape
115 b == ':' || // Windows drive letter
116 b == '@' || b == '&' || b == '?' { // authority or query
117 goto slow
118 }
119 }
120 return rest, nil
121 }
122slow:
123
124 u, err := url.ParseRequestURI(string(uri))
125 if err != nil {
126 return "", err
127 }
128 if u.Scheme != fileScheme {
129 return "", fmt.Errorf("only file URIs are supported, got %q from %q", u.Scheme, uri)
130 }
131 // If the URI is a Windows URI, we trim the leading "/" and uppercase
132 // the drive letter, which will never be case sensitive.
133 if isWindowsDriveURIPath(u.Path) {
134 u.Path = strings.ToUpper(string(u.Path[1])) + u.Path[2:]
135 }
136
137 return u.Path, nil
138}
139
140// ParseDocumentUri interprets a string as a DocumentUri, applying VS
141// Code workarounds; see [DocumentUri.UnmarshalText] for details.
142func ParseDocumentUri(s string) (DocumentUri, error) {
143 if s == "" {
144 return "", nil
145 }
146
147 if !strings.HasPrefix(s, "file://") {
148 return "", fmt.Errorf("DocumentUri scheme is not 'file': %s", s)
149 }
150
151 // VS Code sends URLs with only two slashes,
152 // which are invalid. golang/go#39789.
153 if !strings.HasPrefix(s, "file:///") {
154 s = "file:///" + s[len("file://"):]
155 }
156
157 // Even though the input is a URI, it may not be in canonical form. VS Code
158 // in particular over-escapes :, @, etc. Unescape and re-encode to canonicalize.
159 path, err := url.PathUnescape(s[len("file://"):])
160 if err != nil {
161 return "", err
162 }
163
164 // File URIs from Windows may have lowercase drive letters.
165 // Since drive letters are guaranteed to be case insensitive,
166 // we change them to uppercase to remain consistent.
167 // For example, file:///c:/x/y/z becomes file:///C:/x/y/z.
168 if isWindowsDriveURIPath(path) {
169 path = path[:1] + strings.ToUpper(string(path[1])) + path[2:]
170 }
171 u := url.URL{Scheme: fileScheme, Path: path}
172 return DocumentUri(u.String()), nil
173}
174
175// URIFromPath returns DocumentUri for the supplied file path.
176// Given "", it returns "".
177func URIFromPath(path string) DocumentUri {
178 if path == "" {
179 return ""
180 }
181 if !isWindowsDrivePath(path) {
182 if abs, err := filepath.Abs(path); err == nil {
183 path = abs
184 }
185 }
186 // Check the file path again, in case it became absolute.
187 if isWindowsDrivePath(path) {
188 path = "/" + strings.ToUpper(string(path[0])) + path[1:]
189 }
190 path = filepath.ToSlash(path)
191 u := url.URL{
192 Scheme: fileScheme,
193 Path: path,
194 }
195 return DocumentUri(u.String())
196}
197
198const fileScheme = "file"
199
200// isWindowsDrivePath returns true if the file path is of the form used by
201// Windows. We check if the path begins with a drive letter, followed by a ":".
202// For example: C:/x/y/z.
203func isWindowsDrivePath(path string) bool {
204 if len(path) < 3 {
205 return false
206 }
207 return unicode.IsLetter(rune(path[0])) && path[1] == ':'
208}
209
210// isWindowsDriveURIPath returns true if the file URI is of the format used by
211// Windows URIs. The url.Parse package does not specially handle Windows paths
212// (see golang/go#6027), so we check if the URI path has a drive prefix (e.g. "/C:").
213func isWindowsDriveURIPath(uri string) bool {
214 if len(uri) < 4 {
215 return false
216 }
217 return uri[0] == '/' && unicode.IsLetter(rune(uri[1])) && uri[2] == ':'
218}