fix(lsp): replace sticky unavailable cache with retry backoff (#2498)

iceymoss created

Scope unavailable tracking to each Manager instance and retry missing server commands after a short backoff so temporary environment changes do not permanently block LSP startup.

Change summary

internal/lsp/manager.go      | 52 +++++++++++++++++++++++++++----------
internal/lsp/manager_test.go | 35 +++++++++++++++++++++++++
2 files changed, 73 insertions(+), 14 deletions(-)

Detailed changes

internal/lsp/manager.go 🔗

@@ -21,14 +21,16 @@ import (
 	"github.com/sourcegraph/jsonrpc2"
 )
 
-var unavailable = csync.NewMap[string, struct{}]()
+const unavailableRetryDelay = 30 * time.Second
 
 // Manager handles lazy initialization of LSP clients based on file types.
 type Manager struct {
-	clients  *csync.Map[string, *Client]
-	cfg      *config.ConfigStore
-	manager  *powernapconfig.Manager
-	callback func(name string, client *Client)
+	clients     *csync.Map[string, *Client]
+	unavailable *csync.Map[string, time.Time]
+	cfg         *config.ConfigStore
+	manager     *powernapconfig.Manager
+	callback    func(name string, client *Client)
+	now         func() time.Time
 }
 
 // NewManager creates a new LSP manager service.
@@ -59,10 +61,12 @@ func NewManager(cfg *config.ConfigStore) *Manager {
 	}
 
 	return &Manager{
-		clients:  csync.NewMap[string, *Client](),
-		cfg:      cfg,
-		manager:  manager,
-		callback: func(string, *Client) {}, // default no-op callback
+		clients:     csync.NewMap[string, *Client](),
+		unavailable: csync.NewMap[string, time.Time](),
+		cfg:         cfg,
+		manager:     manager,
+		callback:    func(string, *Client) {}, // default no-op callback
+		now:         time.Now,
 	}
 }
 
@@ -155,10 +159,6 @@ func (s *Manager) startServer(ctx context.Context, name, filepath string, server
 		return
 	}
 
-	if _, exists := unavailable.Get(name); exists {
-		return
-	}
-
 	if client, ok := s.clients.Get(name); ok {
 		switch client.GetServerState() {
 		case StateReady, StateStarting, StateDisabled:
@@ -169,11 +169,15 @@ func (s *Manager) startServer(ctx context.Context, name, filepath string, server
 	}
 
 	if !isUserConfigured {
+		if s.recentlyUnavailable(name) {
+			return
+		}
 		if _, err := exec.LookPath(server.Command); err != nil {
 			slog.Debug("LSP server not installed, skipping", "name", name, "command", server.Command)
-			unavailable.Set(name, struct{}{})
+			s.markUnavailable(name)
 			return
 		}
+		s.clearUnavailable(name)
 		if skipAutoStartCommands[server.Command] {
 			slog.Debug("LSP command too generic for auto-start, skipping", "name", name, "command", server.Command)
 			return
@@ -255,6 +259,26 @@ func (s *Manager) isUserConfigured(name string) bool {
 	return ok && !cfg.Disabled
 }
 
+func (s *Manager) recentlyUnavailable(name string) bool {
+	lastUnavailableAt, exists := s.unavailable.Get(name)
+	if !exists {
+		return false
+	}
+	if s.now().Sub(lastUnavailableAt) < unavailableRetryDelay {
+		return true
+	}
+	s.unavailable.Del(name)
+	return false
+}
+
+func (s *Manager) markUnavailable(name string) {
+	s.unavailable.Set(name, s.now())
+}
+
+func (s *Manager) clearUnavailable(name string) {
+	s.unavailable.Del(name)
+}
+
 func (s *Manager) buildConfig(name string, server *powernapconfig.ServerConfig) config.LSPConfig {
 	cfg := config.LSPConfig{
 		Command:     server.Command,

internal/lsp/manager_test.go 🔗

@@ -0,0 +1,35 @@
+package lsp
+
+import (
+	"testing"
+	"time"
+
+	"github.com/charmbracelet/crush/internal/csync"
+	"github.com/stretchr/testify/require"
+)
+
+func TestUnavailableBackoff(t *testing.T) {
+	t.Parallel()
+
+	base := time.Date(2026, 3, 26, 0, 0, 0, 0, time.UTC)
+	now := base
+
+	manager := &Manager{
+		unavailable: csync.NewMap[string, time.Time](),
+		now:         func() time.Time { return now },
+	}
+
+	require.False(t, manager.recentlyUnavailable("gopls"))
+
+	manager.markUnavailable("gopls")
+	require.True(t, manager.recentlyUnavailable("gopls"))
+
+	now = now.Add(unavailableRetryDelay + time.Second)
+	require.False(t, manager.recentlyUnavailable("gopls"))
+	_, exists := manager.unavailable.Get("gopls")
+	require.False(t, exists)
+
+	manager.markUnavailable("gopls")
+	manager.clearUnavailable("gopls")
+	require.False(t, manager.recentlyUnavailable("gopls"))
+}