fix(lsp): mitigate stale diagnostics

Christian Rocha and Andrey Nering created

* fix(lsp): mitigate stale diagnostics

* fix(lsp): log warning when workspace change notification fails

💘 Generated with Crush

Assisted-by: GLM-5.1 via Crush <crush@charm.land>

* test(lsp): add unit tests for diagnostics settling behavior

💘 Generated with Crush

Assisted-by: GLM-5.1 via Crush <crush@charm.land>

* refactor(lsp): remove blocking diagnostic wait from post-edit notifications

💘 Generated with Crush

Assisted-by: GLM-5.1 via Crush <crush@charm.land>

* chore: add async diagnostic waits

* Update internal/agent/tools/diagnostics.go

Co-authored-by: Andrey Nering <andreynering@users.noreply.github.com>

* fix(lsp): stop timer leaks in WaitForDiagnostics

Replace time.After with time.NewTimer + defer Stop() to prevent
unreclaimable timer allocations on early returns.

💘 Generated with Crush

Assisted-by: GLM-5.1 via Crush <crush@charm.land>

---------

Co-authored-by: Andrey Nering <andreynering@users.noreply.github.com>

Change summary

internal/agent/tools/diagnostics.go |  25 ++++++
internal/lsp/client.go              | 107 ++++++++++++++++++++++++++++--
internal/lsp/client_test.go         |  96 +++++++++++++++++++++++++++
3 files changed, 219 insertions(+), 9 deletions(-)

Detailed changes

internal/agent/tools/diagnostics.go 🔗

@@ -87,25 +87,46 @@ func waitForLSPDiagnostics(
 
 // notifyLSPs notifies LSP servers that a file has changed and waits for
 // updated diagnostics. Use this after edit/multiedit operations.
+// When filepath is empty, refreshes all open files across all LSP clients
+// and sends a workspace-level change notification for full re-analysis.
 func notifyLSPs(
 	ctx context.Context,
 	manager *lsp.Manager,
 	filepath string,
 ) {
-	if filepath == "" || manager == nil {
+	if manager == nil {
+		return
+	}
+	if filepath == "" {
+		// No specific file — refresh all open files for all clients.
+		var wg sync.WaitGroup
+		for client := range manager.Clients().Seq() {
+			wg.Go(func() {
+				client.RefreshOpenFiles(ctx)
+				if err := client.NotifyWorkspaceChange(ctx); err != nil {
+					slog.WarnContext(ctx, "Failed to notify workspace change", "error", err)
+				}
+				client.WaitForDiagnostics(ctx, 5*time.Second)
+			})
+		}
+		wg.Wait()
 		return
 	}
 
 	manager.Start(ctx, filepath)
 
+	var wg sync.WaitGroup
 	for client := range manager.Clients().Seq() {
 		if !client.HandlesFile(filepath) {
 			continue
 		}
 		_ = client.OpenFileOnDemand(ctx, filepath)
 		_ = client.NotifyChange(ctx, filepath)
-		client.WaitForDiagnostics(ctx, 5*time.Second)
+		wg.Go(func() {
+			client.WaitForDiagnostics(ctx, 5*time.Second)
+		})
 	}
+	wg.Wait()
 }
 
 func getDiagnostics(filePath string, manager *lsp.Manager) string {

internal/lsp/client.go 🔗

@@ -530,23 +530,116 @@ func (c *Client) openKeyConfigFiles(ctx context.Context) {
 	}
 }
 
-// WaitForDiagnostics waits until diagnostics change or the timeout is reached.
-func (c *Client) WaitForDiagnostics(ctx context.Context, d time.Duration) {
+// NotifyWorkspaceChange sends a workspace-level file change notification to
+// trigger re-analysis of all files. This is useful when the overall project
+// state may have changed (e.g., after a project-wide refactoring) and
+// diagnostics for files not currently being edited may be stale.
+func (c *Client) NotifyWorkspaceChange(ctx context.Context) error {
+	if c == nil {
+		return nil
+	}
+	return c.client.NotifyDidChangeWatchedFiles(ctx, []protocol.FileEvent{
+		{URI: protocol.DocumentURI(protocol.URIFromPath(c.cwd)), Type: protocol.Changed},
+	})
+}
+
+// RefreshOpenFiles re-notifies the LSP server about all currently open files,
+// which triggers re-analysis and fresh diagnostics for the entire project.
+func (c *Client) RefreshOpenFiles(ctx context.Context) {
 	if c == nil {
 		return
 	}
-	ticker := time.NewTicker(200 * time.Millisecond)
+	for uri, info := range c.openFiles.Seq2() {
+		path, err := protocol.DocumentURI(uri).Path()
+		if err != nil {
+			slog.Warn("Failed to convert URI to path", "uri", uri, "error", err)
+			continue
+		}
+		content, err := os.ReadFile(path)
+		if err != nil {
+			slog.Warn("Failed to read file for refresh", "path", path, "error", err)
+			continue
+		}
+		info.Version++
+		changes := []protocol.TextDocumentContentChangeEvent{
+			{
+				Value: protocol.TextDocumentContentChangeWholeDocument{
+					Text: string(content),
+				},
+			},
+		}
+		if err := c.client.NotifyDidChangeTextDocument(ctx, uri, int(info.Version), changes); err != nil {
+			slog.Warn("Failed to notify file change", "uri", uri, "error", err)
+		}
+	}
+}
+
+// WaitForDiagnostics waits until diagnostics stop changing for a settling
+// period, indicating the LSP server has finished processing. If no
+// diagnostics change within firstChangeDuration, it returns early since the
+// server likely isn't going to republish.
+func (c *Client) WaitForDiagnostics(ctx context.Context, timeout time.Duration) {
+	if c == nil {
+		return
+	}
+
+	const (
+		firstChangeDuration = 1 * time.Second
+		settleDuration      = 300 * time.Millisecond
+	)
+
+	deadline := time.NewTimer(timeout)
+	defer deadline.Stop()
+	firstChangeTimer := time.NewTimer(min(timeout, firstChangeDuration))
+	defer firstChangeTimer.Stop()
+	previousVersion := c.diagnostics.Version()
+	ticker := time.NewTicker(100 * time.Millisecond)
 	defer ticker.Stop()
-	timeout := time.After(d)
-	pv := c.diagnostics.Version()
+
 	for {
 		select {
 		case <-ctx.Done():
 			return
-		case <-timeout:
+		case <-deadline.C:
+			return
+		case <-firstChangeTimer.C:
+			// No change arrived quickly — server isn't republishing.
 			return
 		case <-ticker.C:
-			if pv != c.diagnostics.Version() {
+			currentVersion := c.diagnostics.Version()
+			if currentVersion != previousVersion {
+				// Diagnostics changed — now wait for them to settle.
+				c.waitForDiagnosticsToSettle(ctx, deadline.C, settleDuration)
+				return
+			}
+		}
+	}
+}
+
+// waitForDiagnosticsToSettle waits until diagnostics version stays the same
+// for settleDuration, indicating the LSP server has finished publishing.
+func (c *Client) waitForDiagnosticsToSettle(ctx context.Context, deadline <-chan time.Time, settleDuration time.Duration) {
+	lastVersion := c.diagnostics.Version()
+	settleTicker := time.NewTicker(50 * time.Millisecond)
+	defer settleTicker.Stop()
+
+	// Track how long the version has been stable.
+	stableStart := time.Now()
+
+	for {
+		select {
+		case <-ctx.Done():
+			return
+		case <-deadline:
+			return
+		case <-settleTicker.C:
+			currentVersion := c.diagnostics.Version()
+			if currentVersion != lastVersion {
+				// New change detected — reset the stable timer.
+				lastVersion = currentVersion
+				stableStart = time.Now()
+			} else if time.Since(stableStart) >= settleDuration {
+				// Diagnostics have been stable for the settle duration.
 				return
 			}
 		}

internal/lsp/client_test.go 🔗

@@ -2,11 +2,14 @@ package lsp
 
 import (
 	"context"
+	"fmt"
 	"testing"
 	"time"
 
 	"github.com/charmbracelet/crush/internal/config"
+	"github.com/charmbracelet/crush/internal/csync"
 	"github.com/charmbracelet/crush/internal/env"
+	"github.com/charmbracelet/x/powernap/pkg/lsp/protocol"
 	"github.com/stretchr/testify/require"
 )
 
@@ -70,3 +73,96 @@ func TestNilClient(t *testing.T) {
 	require.Nil(t, c.NotifyChange(context.Background(), "/some/file.go"))
 	c.WaitForDiagnostics(context.Background(), time.Second)
 }
+
+func newTestClient() *Client {
+	c := &Client{
+		name:        "test",
+		diagnostics: csync.NewVersionedMap[protocol.DocumentURI, []protocol.Diagnostic](),
+		openFiles:   csync.NewMap[string, *OpenFileInfo](),
+	}
+	c.serverState.Store(StateStopped)
+	return c
+}
+
+func TestWaitForDiagnostics_NoChange(t *testing.T) {
+	t.Parallel()
+
+	c := newTestClient()
+	start := time.Now()
+	c.WaitForDiagnostics(t.Context(), 5*time.Second)
+	elapsed := time.Since(start)
+
+	// Should return early via firstChangeDeadline (~1s), not the full timeout.
+	require.Less(t, elapsed, 2*time.Second, "should return early when no diagnostics change")
+}
+
+func TestWaitForDiagnostics_ImmediateChange(t *testing.T) {
+	t.Parallel()
+
+	c := newTestClient()
+
+	go func() {
+		time.Sleep(100 * time.Millisecond)
+		c.diagnostics.Set(protocol.DocumentURI("file:///test.go"), nil)
+	}()
+
+	start := time.Now()
+	c.WaitForDiagnostics(t.Context(), 5*time.Second)
+	elapsed := time.Since(start)
+
+	// Should detect the change and then settle (~300ms settle + overhead).
+	require.Less(t, elapsed, 2*time.Second, "should return after settling, not full timeout")
+	require.Greater(t, elapsed, 200*time.Millisecond, "should wait for settle duration")
+}
+
+func TestWaitForDiagnostics_RepeatedChanges(t *testing.T) {
+	t.Parallel()
+
+	c := newTestClient()
+
+	// Simulate an LSP server that publishes diagnostics in bursts.
+	go func() {
+		for i := range 5 {
+			time.Sleep(50 * time.Millisecond)
+			c.diagnostics.Set(protocol.DocumentURI("file:///test.go"), []protocol.Diagnostic{
+				{Message: fmt.Sprintf("diag-%d", i)},
+			})
+		}
+	}()
+
+	start := time.Now()
+	c.WaitForDiagnostics(t.Context(), 5*time.Second)
+	elapsed := time.Since(start)
+
+	// Should wait for diagnostics to settle after the burst finishes.
+	// Burst lasts ~250ms, then 300ms settle window, so total ~550ms+.
+	require.Less(t, elapsed, 2*time.Second, "should return after settling, not full timeout")
+	require.Greater(t, elapsed, 400*time.Millisecond, "should wait for all changes to settle")
+}
+
+func TestWaitForDiagnostics_ContextCancellation(t *testing.T) {
+	t.Parallel()
+
+	c := newTestClient()
+	ctx, cancel := context.WithCancel(t.Context())
+	defer cancel()
+
+	go func() {
+		time.Sleep(200 * time.Millisecond)
+		cancel()
+	}()
+
+	start := time.Now()
+	c.WaitForDiagnostics(ctx, 5*time.Second)
+	elapsed := time.Since(start)
+
+	require.Less(t, elapsed, 1*time.Second, "should return shortly after context cancellation")
+}
+
+func TestWaitForDiagnostics_NilClient(t *testing.T) {
+	t.Parallel()
+
+	var c *Client
+	// Should not panic.
+	c.WaitForDiagnostics(context.Background(), time.Second)
+}