fix: improving shutdown (#2175)

Carlos Alexandro Becker and Andrey Nering created

* test: use t.Context() and synctest

Signed-off-by: Carlos Alexandro Becker <caarlos0@users.noreply.github.com>

* fix: passing down context to all shutdown funcs

Signed-off-by: Carlos Alexandro Becker <caarlos0@users.noreply.github.com>

* perf(lsp): kill all clients on shutdown

Signed-off-by: Carlos Alexandro Becker <caarlos0@users.noreply.github.com>

* fix: exit posthog earlier

Signed-off-by: Carlos Alexandro Becker <caarlos0@users.noreply.github.com>

* test: fix dirs test

Signed-off-by: Carlos Alexandro Becker <caarlos0@users.noreply.github.com>

* test: fix projects test

Signed-off-by: Carlos Alexandro Becker <caarlos0@users.noreply.github.com>

* test: fix race

Signed-off-by: Carlos Alexandro Becker <caarlos0@users.noreply.github.com>

* Update internal/lsp/manager.go

Co-authored-by: Andrey Nering <andreynering@users.noreply.github.com>

* fix: cleanup

Signed-off-by: Carlos Alexandro Becker <caarlos0@users.noreply.github.com>

* test: race

Signed-off-by: Carlos Alexandro Becker <caarlos0@users.noreply.github.com>

---------

Signed-off-by: Carlos Alexandro Becker <caarlos0@users.noreply.github.com>
Co-authored-by: Andrey Nering <andreynering@users.noreply.github.com>

Change summary

go.mod                             |  2 +-
go.sum                             |  4 ++--
internal/agent/tools/mcp/init.go   |  5 +----
internal/app/app.go                | 22 ++++++++++++++++------
internal/cmd/dirs_test.go          |  2 ++
internal/cmd/root.go               |  3 ---
internal/lsp/client.go             |  5 ++++-
internal/lsp/manager.go            | 22 ++++++++++++++++++++++
internal/projects/projects_test.go |  6 ++++++
internal/shell/background_test.go  | 25 ++++++++++++++-----------
10 files changed, 68 insertions(+), 28 deletions(-)

Detailed changes

go.mod 🔗

@@ -31,7 +31,7 @@ require (
 	github.com/charmbracelet/x/exp/ordered v0.1.0
 	github.com/charmbracelet/x/exp/slice v0.0.0-20251201173703-9f73bfd934ff
 	github.com/charmbracelet/x/exp/strings v0.1.0
-	github.com/charmbracelet/x/powernap v0.0.0-20260127155452-b72a9a918687
+	github.com/charmbracelet/x/powernap v0.0.0-20260209132835-6b065b8ba62c
 	github.com/charmbracelet/x/term v0.2.2
 	github.com/clipperhouse/displaywidth v0.10.0
 	github.com/clipperhouse/uax29/v2 v2.6.0

go.sum 🔗

@@ -122,8 +122,8 @@ github.com/charmbracelet/x/exp/strings v0.1.0 h1:i69S2XI7uG1u4NLGeJPSYU++Nmjvpo9
 github.com/charmbracelet/x/exp/strings v0.1.0/go.mod h1:/ehtMPNh9K4odGFkqYJKpIYyePhdp1hLBRvyY4bWkH8=
 github.com/charmbracelet/x/json v0.2.0 h1:DqB+ZGx2h+Z+1s98HOuOyli+i97wsFQIxP2ZQANTPrQ=
 github.com/charmbracelet/x/json v0.2.0/go.mod h1:opFIflx2YgXgi49xVUu8gEQ21teFAxyMwvOiZhIvWNM=
-github.com/charmbracelet/x/powernap v0.0.0-20260127155452-b72a9a918687 h1:h1XMgTkpBt9kEJ+9DkARNBXEgaigUQ0cI2Bot7Awnt8=
-github.com/charmbracelet/x/powernap v0.0.0-20260127155452-b72a9a918687/go.mod h1:cmdl5zlP5mR8TF2Y68UKc7hdGUDiSJ2+4hk0h04Hsx4=
+github.com/charmbracelet/x/powernap v0.0.0-20260209132835-6b065b8ba62c h1:6E+Y7WQ6Rnw+FmeXoRBtyCBkPcXS0hSMuws6QBr+nyQ=
+github.com/charmbracelet/x/powernap v0.0.0-20260209132835-6b065b8ba62c/go.mod h1:cmdl5zlP5mR8TF2Y68UKc7hdGUDiSJ2+4hk0h04Hsx4=
 github.com/charmbracelet/x/term v0.2.2 h1:xVRT/S2ZcKdhhOuSP4t5cLi5o+JxklsoEObBSgfgZRk=
 github.com/charmbracelet/x/term v0.2.2/go.mod h1:kF8CY5RddLWrsgVwpw4kAa6TESp6EB5y3uxGLeCqzAI=
 github.com/charmbracelet/x/termios v0.1.1 h1:o3Q2bT8eqzGnGPOYheoYS8eEleT5ZVNYNy8JawjaNZY=

internal/agent/tools/mcp/init.go 🔗

@@ -123,10 +123,7 @@ func GetState(name string) (ClientInfo, bool) {
 }
 
 // Close closes all MCP clients. This should be called during application shutdown.
-func Close() error {
-	ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
-	defer cancel()
-
+func Close(ctx context.Context) error {
 	var wg sync.WaitGroup
 	for name, session := range sessions.Seq2() {
 		wg.Go(func() {

internal/app/app.go 🔗

@@ -22,6 +22,7 @@ import (
 	"github.com/charmbracelet/crush/internal/agent/tools/mcp"
 	"github.com/charmbracelet/crush/internal/config"
 	"github.com/charmbracelet/crush/internal/db"
+	"github.com/charmbracelet/crush/internal/event"
 	"github.com/charmbracelet/crush/internal/filetracker"
 	"github.com/charmbracelet/crush/internal/format"
 	"github.com/charmbracelet/crush/internal/history"
@@ -68,7 +69,7 @@ type App struct {
 
 	// global context and cleanup functions
 	globalCtx    context.Context
-	cleanupFuncs []func() error
+	cleanupFuncs []func(context.Context) error
 }
 
 // New initializes a new application instance.
@@ -108,7 +109,11 @@ func New(ctx context.Context, conn *sql.DB, cfg *config.Config) (*App, error) {
 	go mcp.Initialize(ctx, app.Permissions, cfg)
 
 	// cleanup database upon app shutdown
-	app.cleanupFuncs = append(app.cleanupFuncs, conn.Close, mcp.Close)
+	app.cleanupFuncs = append(
+		app.cleanupFuncs,
+		func(context.Context) error { return conn.Close() },
+		mcp.Close,
+	)
 
 	// TODO: remove the concept of agent config, most likely.
 	if !cfg.IsConfigured() {
@@ -416,7 +421,7 @@ func (app *App) setupEvents() {
 	setupSubscriber(ctx, app.serviceEventsWG, "history", app.History.Subscribe, app.events)
 	setupSubscriber(ctx, app.serviceEventsWG, "mcp", mcp.SubscribeEvents, app.events)
 	setupSubscriber(ctx, app.serviceEventsWG, "lsp", SubscribeLSPEvents, app.events)
-	cleanupFunc := func() error {
+	cleanupFunc := func(context.Context) error {
 		cancel()
 		app.serviceEventsWG.Wait()
 		return nil
@@ -503,7 +508,7 @@ func (app *App) Subscribe(program *tea.Program) {
 
 	app.tuiWG.Add(1)
 	tuiCtx, tuiCancel := context.WithCancel(app.globalCtx)
-	app.cleanupFuncs = append(app.cleanupFuncs, func() error {
+	app.cleanupFuncs = append(app.cleanupFuncs, func(context.Context) error {
 		slog.Debug("Cancelling TUI message handler")
 		tuiCancel()
 		app.tuiWG.Wait()
@@ -544,6 +549,11 @@ func (app *App) Shutdown() {
 	shutdownCtx, cancel := context.WithTimeout(app.globalCtx, 5*time.Second)
 	defer cancel()
 
+	// Send exit event
+	wg.Go(func() {
+		event.AppExited()
+	})
+
 	// Kill all background shells.
 	wg.Go(func() {
 		shell.GetBackgroundShellManager().KillAll(shutdownCtx)
@@ -551,14 +561,14 @@ func (app *App) Shutdown() {
 
 	// Shutdown all LSP clients.
 	wg.Go(func() {
-		app.LSPManager.StopAll(shutdownCtx)
+		app.LSPManager.KillAll(shutdownCtx)
 	})
 
 	// Call all cleanup functions.
 	for _, cleanup := range app.cleanupFuncs {
 		if cleanup != nil {
 			wg.Go(func() {
-				if err := cleanup(); err != nil {
+				if err := cleanup(shutdownCtx); err != nil {
 					slog.Error("Failed to cleanup app properly on shutdown", "error", err)
 				}
 			})

internal/cmd/dirs_test.go 🔗

@@ -12,6 +12,8 @@ import (
 func init() {
 	os.Setenv("XDG_CONFIG_HOME", "/tmp/fakeconfig")
 	os.Setenv("XDG_DATA_HOME", "/tmp/fakedata")
+	os.Unsetenv("CRUSH_GLOBAL_CONFIG")
+	os.Unsetenv("CRUSH_GLOBAL_DATA")
 }
 
 func TestDirs(t *testing.T) {

internal/cmd/root.go 🔗

@@ -106,9 +106,6 @@ crush -y
 		}
 		return nil
 	},
-	PostRun: func(cmd *cobra.Command, args []string) {
-		event.AppExited()
-	},
 }
 
 var heartbit = lipgloss.NewStyle().Foreground(charmtone.Dolly).SetString(`

internal/lsp/client.go 🔗

@@ -117,7 +117,10 @@ func (c *Client) Initialize(ctx context.Context, workspaceDir string) (*protocol
 	return result, nil
 }
 
-// Close closes the LSP client.
+// Kill kills the client without doing anything else.
+func (c *Client) Kill() { c.client.Kill() }
+
+// Close closes all open files in the client, then the client.
 func (c *Client) Close(ctx context.Context) error {
 	c.CloseAllFiles(ctx)
 

internal/lsp/manager.go 🔗

@@ -255,6 +255,28 @@ func handles(server *powernapconfig.ServerConfig, filePath, workDir string) bool
 		hasRootMarkers(workDir, server.RootMarkers)
 }
 
+// KillAll force-kills all the LSP clients.
+//
+// This is generally faster than [Manager.StopAll] because it doesn't wait for
+// the server to exit gracefully, but it can lead to data loss if the server is
+// in the middle of writing something.
+// Generally it doesn't matter when shutting down Crush, though.
+func (s *Manager) KillAll(context.Context) {
+	s.mu.Lock()
+	defer s.mu.Unlock()
+
+	var wg sync.WaitGroup
+	for name, client := range s.clients.Seq2() {
+		wg.Go(func() {
+			defer func() { s.callback(name, client) }()
+			client.client.Kill()
+			client.SetServerState(StateStopped)
+			slog.Debug("Killed LSP client", "name", name)
+		})
+	}
+	wg.Wait()
+}
+
 // StopAll stops all running LSP clients and clears the client map.
 func (s *Manager) StopAll(ctx context.Context) {
 	s.mu.Lock()

internal/projects/projects_test.go 🔗

@@ -12,6 +12,7 @@ func TestRegisterAndList(t *testing.T) {
 
 	// Override the projects file path for testing
 	t.Setenv("XDG_DATA_HOME", tmpDir)
+	t.Setenv("CRUSH_GLOBAL_DATA", filepath.Join(tmpDir, "crush"))
 
 	// Test registering a project
 	err := Register("/home/user/project1", "/home/user/project1/.crush")
@@ -61,6 +62,7 @@ func TestRegisterAndList(t *testing.T) {
 func TestRegisterUpdatesExisting(t *testing.T) {
 	tmpDir := t.TempDir()
 	t.Setenv("XDG_DATA_HOME", tmpDir)
+	t.Setenv("CRUSH_GLOBAL_DATA", filepath.Join(tmpDir, "crush"))
 
 	// Register a project
 	err := Register("/home/user/project1", "/home/user/project1/.crush")
@@ -97,6 +99,7 @@ func TestRegisterUpdatesExisting(t *testing.T) {
 func TestLoadEmptyFile(t *testing.T) {
 	tmpDir := t.TempDir()
 	t.Setenv("XDG_DATA_HOME", tmpDir)
+	t.Setenv("CRUSH_GLOBAL_DATA", filepath.Join(tmpDir, "crush"))
 
 	// List before any projects exist
 	projects, err := List()
@@ -112,6 +115,7 @@ func TestLoadEmptyFile(t *testing.T) {
 func TestProjectsFilePath(t *testing.T) {
 	tmpDir := t.TempDir()
 	t.Setenv("XDG_DATA_HOME", tmpDir)
+	t.Setenv("CRUSH_GLOBAL_DATA", filepath.Join(tmpDir, "crush"))
 
 	expected := filepath.Join(tmpDir, "crush", "projects.json")
 	actual := projectsFilePath()
@@ -124,6 +128,7 @@ func TestProjectsFilePath(t *testing.T) {
 func TestRegisterWithParentDataDir(t *testing.T) {
 	tmpDir := t.TempDir()
 	t.Setenv("XDG_DATA_HOME", tmpDir)
+	t.Setenv("CRUSH_GLOBAL_DATA", filepath.Join(tmpDir, "crush"))
 
 	// Register a project where .crush is in a parent directory.
 	// e.g., working in /home/user/monorepo/packages/app but .crush is at /home/user/monorepo/.crush
@@ -153,6 +158,7 @@ func TestRegisterWithParentDataDir(t *testing.T) {
 func TestRegisterWithExternalDataDir(t *testing.T) {
 	tmpDir := t.TempDir()
 	t.Setenv("XDG_DATA_HOME", tmpDir)
+	t.Setenv("CRUSH_GLOBAL_DATA", filepath.Join(tmpDir, "crush"))
 
 	// Register a project where .crush is in a completely different location.
 	// e.g., project at /home/user/project but data stored at /var/data/crush/myproject

internal/shell/background_test.go 🔗

@@ -14,7 +14,7 @@ func TestBackgroundShellManager_Start(t *testing.T) {
 	t.Skip("Skipping this until I figure out why its flaky")
 	t.Parallel()
 
-	ctx := context.Background()
+	ctx := t.Context()
 	workingDir := t.TempDir()
 	manager := newBackgroundShellManager()
 
@@ -51,7 +51,7 @@ func TestBackgroundShellManager_Start(t *testing.T) {
 func TestBackgroundShellManager_Get(t *testing.T) {
 	t.Parallel()
 
-	ctx := context.Background()
+	ctx := t.Context()
 	workingDir := t.TempDir()
 	manager := newBackgroundShellManager()
 
@@ -77,7 +77,7 @@ func TestBackgroundShellManager_Get(t *testing.T) {
 func TestBackgroundShellManager_Kill(t *testing.T) {
 	t.Parallel()
 
-	ctx := context.Background()
+	ctx := t.Context()
 	workingDir := t.TempDir()
 	manager := newBackgroundShellManager()
 
@@ -119,7 +119,7 @@ func TestBackgroundShellManager_KillNonExistent(t *testing.T) {
 func TestBackgroundShell_IsDone(t *testing.T) {
 	t.Parallel()
 
-	ctx := context.Background()
+	ctx := t.Context()
 	workingDir := t.TempDir()
 	manager := newBackgroundShellManager()
 
@@ -142,7 +142,7 @@ func TestBackgroundShell_IsDone(t *testing.T) {
 func TestBackgroundShell_WithBlockFuncs(t *testing.T) {
 	t.Parallel()
 
-	ctx := context.Background()
+	ctx := t.Context()
 	workingDir := t.TempDir()
 	manager := newBackgroundShellManager()
 
@@ -180,7 +180,7 @@ func TestBackgroundShellManager_List(t *testing.T) {
 
 	t.Parallel()
 
-	ctx := context.Background()
+	ctx := t.Context()
 	workingDir := t.TempDir()
 	manager := newBackgroundShellManager()
 
@@ -224,7 +224,7 @@ func TestBackgroundShellManager_List(t *testing.T) {
 func TestBackgroundShellManager_KillAll(t *testing.T) {
 	t.Parallel()
 
-	ctx := context.Background()
+	ctx := t.Context()
 	workingDir := t.TempDir()
 	manager := newBackgroundShellManager()
 
@@ -250,7 +250,7 @@ func TestBackgroundShellManager_KillAll(t *testing.T) {
 	}
 
 	// Kill all shells
-	manager.KillAll(context.Background())
+	manager.KillAll(t.Context())
 
 	// Verify all shells are done
 	if !shell1.IsDone() {
@@ -286,19 +286,22 @@ func TestBackgroundShellManager_KillAll(t *testing.T) {
 func TestBackgroundShellManager_KillAll_Timeout(t *testing.T) {
 	t.Parallel()
 
+	// XXX: can't use synctest here - causes --race to trip.
+
 	workingDir := t.TempDir()
 	manager := newBackgroundShellManager()
 
 	// Start a shell that traps signals and ignores cancellation.
-	_, err := manager.Start(context.Background(), workingDir, nil, "trap '' TERM INT; sleep 60", "")
+	_, err := manager.Start(t.Context(), workingDir, nil, "trap '' TERM INT; sleep 60", "")
 	require.NoError(t, err)
 
 	// Short timeout to test the timeout path.
-	ctx, cancel := context.WithTimeout(context.Background(), 100*time.Millisecond)
-	defer cancel()
+	ctx, cancel := context.WithTimeout(t.Context(), 100*time.Millisecond)
+	t.Cleanup(cancel)
 
 	start := time.Now()
 	manager.KillAll(ctx)
+
 	elapsed := time.Since(start)
 
 	// Must return promptly after timeout, not hang for 60 seconds.