backend.go

  1// Package backend provides transport-agnostic operations for managing
  2// workspaces, sessions, agents, permissions, and events. It is consumed
  3// by protocol-specific layers such as HTTP (server) and ACP.
  4package backend
  5
  6import (
  7	"context"
  8	"errors"
  9	"fmt"
 10	"log/slog"
 11	"path/filepath"
 12	"runtime"
 13	"sync"
 14	"time"
 15
 16	"github.com/charmbracelet/crush/internal/app"
 17	"github.com/charmbracelet/crush/internal/config"
 18	"github.com/charmbracelet/crush/internal/csync"
 19	"github.com/charmbracelet/crush/internal/db"
 20	"github.com/charmbracelet/crush/internal/proto"
 21	"github.com/charmbracelet/crush/internal/ui/util"
 22	"github.com/charmbracelet/crush/internal/version"
 23	"github.com/google/uuid"
 24)
 25
 26// Common errors returned by backend operations.
 27var (
 28	ErrWorkspaceNotFound       = errors.New("workspace not found")
 29	ErrLSPClientNotFound       = errors.New("LSP client not found")
 30	ErrAgentNotInitialized     = errors.New("agent coordinator not initialized")
 31	ErrPathRequired            = errors.New("path is required")
 32	ErrInvalidPermissionAction = errors.New("invalid permission action")
 33	ErrUnknownCommand          = errors.New("unknown command")
 34	ErrInvalidClientID         = errors.New("invalid client_id")
 35)
 36
 37// DefaultCreateGrace is the window in which a client must open an SSE
 38// stream after creating a workspace before its creation hold is
 39// released. Exposed as a package variable so tests can shorten it.
 40var DefaultCreateGrace = 30 * time.Second
 41
 42// ShutdownFunc is called when the backend needs to trigger a server
 43// shutdown (e.g. when the last workspace is removed).
 44type ShutdownFunc func()
 45
 46// Backend provides transport-agnostic business logic for the Crush
 47// server. It manages workspaces and delegates to [app.App] services.
 48//
 49// Locking order: when both [Backend.mu] and [Workspace.clientsMu] are
 50// held at once, [Backend.mu] is acquired first. Detach paths
 51// ([detachStream], [releaseHoldLocked], [expireHold]) only hold
 52// [Workspace.clientsMu] briefly, drop it, then call [teardown] which
 53// takes [Backend.mu] (and then re-takes [Workspace.clientsMu] to
 54// re-check that the workspace has not been re-claimed). This avoids
 55// the AB/BA hazard with [CreateWorkspace], which holds [Backend.mu]
 56// while calling [registerClient] so that a workspace cannot be torn
 57// down beneath it.
 58type Backend struct {
 59	workspaces *csync.Map[string, *Workspace]
 60	// pathIndex maps a resolved absolute workspace path to its
 61	// workspace ID. Reads and writes are serialised via mu so
 62	// concurrent CreateWorkspace calls at the same path deduplicate
 63	// deterministically.
 64	pathIndex map[string]string
 65	mu        sync.Mutex
 66
 67	cfg         *config.ConfigStore
 68	ctx         context.Context
 69	shutdownFn  ShutdownFunc
 70	createGrace time.Duration
 71}
 72
 73// clientState tracks one client's claim on a workspace.
 74//
 75//   - streams counts the number of live SSE event streams the client
 76//     currently has open against the workspace.
 77//   - holdTimer is non-nil iff the client created the workspace but has
 78//     not yet attached an SSE stream; it fires after createGrace and
 79//     releases the hold.
 80//
 81// The two are mutually exclusive in practice (the hold timer is stopped
 82// the moment an SSE stream attaches), but both being zero/nil means the
 83// entry has been released and should be removed.
 84type clientState struct {
 85	streams   int
 86	holdTimer *time.Timer
 87}
 88
 89// Workspace represents a running [app.App] workspace with its
 90// associated resources and state.
 91type Workspace struct {
 92	*app.App
 93	ID   string
 94	Path string
 95	Cfg  *config.ConfigStore
 96	Env  []string
 97
 98	// resolvedPath is the path used as the dedup key in
 99	// Backend.pathIndex. It is filepath.EvalSymlinks(filepath.Abs(Path))
100	// with fallback to the cleaned absolute path.
101	resolvedPath string
102
103	// clientsMu guards clients. It is held only briefly (no IO).
104	clientsMu sync.Mutex
105	// clients tracks each client's claim on this workspace. Refcount
106	// is a derived value: len(clients).
107	clients map[string]*clientState
108
109	// shutdownFn is the function invoked by [Backend.teardown] to
110	// release the workspace's underlying resources. It defaults to the
111	// embedded [app.App.Shutdown]; tests may override it to avoid
112	// driving a full [app.App] through shutdown.
113	shutdownFn func()
114}
115
116// invokeShutdown calls the workspace shutdown hook if set, falling
117// back to the embedded [app.App.Shutdown] when not.
118func (w *Workspace) invokeShutdown() {
119	if w.shutdownFn != nil {
120		w.shutdownFn()
121		return
122	}
123	if w.App != nil {
124		w.Shutdown()
125	}
126}
127
128// New creates a new [Backend].
129func New(ctx context.Context, cfg *config.ConfigStore, shutdownFn ShutdownFunc) *Backend {
130	return &Backend{
131		workspaces:  csync.NewMap[string, *Workspace](),
132		pathIndex:   make(map[string]string),
133		cfg:         cfg,
134		ctx:         ctx,
135		shutdownFn:  shutdownFn,
136		createGrace: DefaultCreateGrace,
137	}
138}
139
140// SetCreateGrace overrides the create-grace window. Intended for tests
141// that need short timeouts.
142func (b *Backend) SetCreateGrace(d time.Duration) {
143	b.mu.Lock()
144	defer b.mu.Unlock()
145	b.createGrace = d
146}
147
148// GetWorkspace retrieves a workspace by ID.
149func (b *Backend) GetWorkspace(id string) (*Workspace, error) {
150	ws, ok := b.workspaces.Get(id)
151	if !ok {
152		return nil, ErrWorkspaceNotFound
153	}
154	return ws, nil
155}
156
157// ListWorkspaces returns all running workspaces.
158func (b *Backend) ListWorkspaces() []proto.Workspace {
159	workspaces := []proto.Workspace{}
160	for _, ws := range b.workspaces.Seq2() {
161		workspaces = append(workspaces, workspaceToProto(ws))
162	}
163	return workspaces
164}
165
166// CreateWorkspace initializes a new workspace from the given
167// parameters, or returns an existing workspace if one already exists at
168// the same resolved path (first-wins semantics).
169//
170// args.ClientID must be a valid UUID identifying the calling client;
171// the resulting workspace registers a creation hold on behalf of that
172// client which is released either by the first SSE attach (which
173// converts it into a stream claim) or by the grace window expiring.
174func (b *Backend) CreateWorkspace(args proto.Workspace) (*Workspace, proto.Workspace, error) {
175	if args.Path == "" {
176		return nil, proto.Workspace{}, ErrPathRequired
177	}
178	clientID, err := validateClientID(args.ClientID)
179	if err != nil {
180		return nil, proto.Workspace{}, err
181	}
182
183	key, err := resolveWorkspaceKey(args.Path)
184	if err != nil {
185		return nil, proto.Workspace{}, fmt.Errorf("failed to resolve workspace path: %w", err)
186	}
187
188	b.mu.Lock()
189	if existingID, ok := b.pathIndex[key]; ok {
190		if ws, found := b.workspaces.Get(existingID); found {
191			// Hold b.mu while registering: teardown also
192			// acquires b.mu before tearing the workspace
193			// down, so this guarantees the workspace we
194			// return cannot be torn out from under us
195			// between lookup and registerClient. Lock order
196			// here is b.mu -> ws.clientsMu.
197			logFirstWinsMismatch(ws, args)
198			b.registerClient(ws, clientID)
199			b.mu.Unlock()
200			return ws, workspaceToProto(ws), nil
201		}
202		// pathIndex referenced a workspace that has since been
203		// removed; clean the stale entry and fall through.
204		delete(b.pathIndex, key)
205	}
206	b.mu.Unlock()
207
208	id := uuid.New().String()
209	cfg, err := config.Init(args.Path, args.DataDir, args.Debug)
210	if err != nil {
211		return nil, proto.Workspace{}, fmt.Errorf("failed to initialize config: %w", err)
212	}
213
214	cfg.Overrides().SkipPermissionRequests = args.YOLO
215
216	if err := createDotCrushDir(cfg.Config().Options.DataDirectory); err != nil {
217		return nil, proto.Workspace{}, fmt.Errorf("failed to create data directory: %w", err)
218	}
219
220	conn, err := db.Connect(b.ctx, cfg.Config().Options.DataDirectory)
221	if err != nil {
222		return nil, proto.Workspace{}, fmt.Errorf("failed to connect to database: %w", err)
223	}
224
225	appWorkspace, err := app.New(b.ctx, conn, cfg)
226	if err != nil {
227		return nil, proto.Workspace{}, fmt.Errorf("failed to create app workspace: %w", err)
228	}
229
230	ws := &Workspace{
231		App:          appWorkspace,
232		ID:           id,
233		Path:         args.Path,
234		Cfg:          cfg,
235		Env:          args.Env,
236		resolvedPath: key,
237		clients:      make(map[string]*clientState),
238	}
239
240	b.mu.Lock()
241	// Re-check the index under the lock: a concurrent caller may have
242	// won the race between the initial unlock and here.
243	if existingID, ok := b.pathIndex[key]; ok {
244		if existing, found := b.workspaces.Get(existingID); found {
245			// Register under b.mu so teardown cannot run
246			// between lookup and registerClient. Lock order
247			// is b.mu -> ws.clientsMu.
248			logFirstWinsMismatch(existing, args)
249			b.registerClient(existing, clientID)
250			b.mu.Unlock()
251			ws.invokeShutdown()
252			return existing, workspaceToProto(existing), nil
253		}
254		delete(b.pathIndex, key)
255	}
256	b.workspaces.Set(id, ws)
257	b.pathIndex[key] = id
258	// Register the originating client's hold while still holding
259	// b.mu so the workspace is observable with its claim from the
260	// moment it appears in the index.
261	b.registerClient(ws, clientID)
262	b.mu.Unlock()
263
264	if args.Version != "" && args.Version != version.Version {
265		slog.Warn(
266			"Client/server version mismatch",
267			"client", args.Version,
268			"server", version.Version,
269		)
270		appWorkspace.SendEvent(util.NewWarnMsg(fmt.Sprintf(
271			"Server version %q differs from client version %q. Consider restarting the server.",
272			version.Version, args.Version,
273		)))
274	}
275
276	return ws, workspaceToProto(ws), nil
277}
278
279// AttachClient registers a new SSE stream for the given client on the
280// workspace. The stream's deferred cleanup must call DetachClient with
281// the same arguments to release the claim.
282//
283// The lookup and the clients-map mutation are performed under
284// [Backend.mu] so that AttachClient cannot race with [Backend.teardown]:
285// teardown also holds [Backend.mu] while removing the workspace from
286// b.workspaces, so once AttachClient observes the workspace and takes
287// ws.clientsMu (under b.mu), no concurrent teardown can succeed without
288// re-checking the (now non-empty) clients map. Lock order is the
289// canonical b.mu -> ws.clientsMu.
290func (b *Backend) AttachClient(workspaceID, clientID string) error {
291	if _, err := validateClientID(clientID); err != nil {
292		return err
293	}
294
295	b.mu.Lock()
296	defer b.mu.Unlock()
297	ws, ok := b.workspaces.Get(workspaceID)
298	if !ok {
299		return ErrWorkspaceNotFound
300	}
301
302	ws.clientsMu.Lock()
303	defer ws.clientsMu.Unlock()
304	cs, ok := ws.clients[clientID]
305	if !ok {
306		// Defensive: SSE attach without a prior CreateWorkspace by
307		// this client still installs a stream claim so the stream
308		// stays alive for its duration.
309		ws.clients[clientID] = &clientState{streams: 1}
310		return nil
311	}
312	if cs.holdTimer != nil {
313		cs.holdTimer.Stop()
314		cs.holdTimer = nil
315	}
316	cs.streams++
317	return nil
318}
319
320// DetachClient releases one SSE stream's hold on the workspace. If the
321// client has no other streams and no pending creation hold, its claim
322// is removed and the workspace is torn down once refcount hits zero.
323func (b *Backend) DetachClient(workspaceID, clientID string) {
324	ws, ok := b.workspaces.Get(workspaceID)
325	if !ok {
326		return
327	}
328	b.detachStream(ws, clientID)
329}
330
331// releaseHold releases the creation hold for a client, if any. Active
332// stream claims are unaffected. Idempotent: returns nil if the
333// workspace or the client's hold no longer exist.
334func (b *Backend) releaseHold(workspaceID, clientID string) error {
335	if _, err := validateClientID(clientID); err != nil {
336		return err
337	}
338	ws, ok := b.workspaces.Get(workspaceID)
339	if !ok {
340		return nil
341	}
342	b.releaseHoldLocked(ws, clientID)
343	return nil
344}
345
346// registerClient installs (idempotently) the given client's claim on
347// the workspace and starts a grace timer if the entry is fresh.
348func (b *Backend) registerClient(ws *Workspace, clientID string) {
349	ws.clientsMu.Lock()
350	defer ws.clientsMu.Unlock()
351	if _, ok := ws.clients[clientID]; ok {
352		// Idempotent: a duplicate CreateWorkspace from the same
353		// client does not add a second claim.
354		return
355	}
356	cs := &clientState{}
357	cs.holdTimer = time.AfterFunc(b.createGrace, func() {
358		b.expireHold(ws, clientID, cs)
359	})
360	ws.clients[clientID] = cs
361}
362
363// expireHold is the body of the grace timer. It runs in its own
364// goroutine and races against AttachClient/releaseHold; the timer
365// stays valid only while the entry's holdTimer still points at it.
366func (b *Backend) expireHold(ws *Workspace, clientID string, timer *clientState) {
367	ws.clientsMu.Lock()
368	cs, ok := ws.clients[clientID]
369	if !ok || cs != timer || cs.holdTimer == nil || cs.streams > 0 {
370		ws.clientsMu.Unlock()
371		return
372	}
373	cs.holdTimer = nil
374	delete(ws.clients, clientID)
375	teardown := len(ws.clients) == 0
376	ws.clientsMu.Unlock()
377	if teardown {
378		b.teardown(ws)
379	}
380}
381
382func (b *Backend) releaseHoldLocked(ws *Workspace, clientID string) {
383	ws.clientsMu.Lock()
384	cs, ok := ws.clients[clientID]
385	if !ok {
386		ws.clientsMu.Unlock()
387		return
388	}
389	if cs.holdTimer != nil {
390		cs.holdTimer.Stop()
391		cs.holdTimer = nil
392	}
393	teardown := false
394	if cs.streams == 0 {
395		delete(ws.clients, clientID)
396		teardown = len(ws.clients) == 0
397	}
398	ws.clientsMu.Unlock()
399	if teardown {
400		b.teardown(ws)
401	}
402}
403
404func (b *Backend) detachStream(ws *Workspace, clientID string) {
405	ws.clientsMu.Lock()
406	cs, ok := ws.clients[clientID]
407	if !ok {
408		ws.clientsMu.Unlock()
409		return
410	}
411	if cs.streams > 0 {
412		cs.streams--
413	}
414	teardown := false
415	if cs.streams == 0 && cs.holdTimer == nil {
416		delete(ws.clients, clientID)
417		teardown = len(ws.clients) == 0
418	}
419	ws.clientsMu.Unlock()
420	if teardown {
421		b.teardown(ws)
422	}
423}
424
425// teardown removes the workspace from the index, shuts down its
426// underlying [app.App], and triggers a server shutdown if it was the
427// last workspace alive.
428//
429// Callers reach teardown after observing len(ws.clients) == 0 while
430// holding ws.clientsMu and then releasing it. Between that release
431// and the b.mu.Lock below, a concurrent CreateWorkspace may have
432// re-registered a client (CreateWorkspace holds b.mu while doing so,
433// so it is mutually exclusive with this critical section). teardown
434// re-checks under both locks (in the canonical b.mu -> ws.clientsMu
435// order) and aborts if the workspace has been re-claimed.
436func (b *Backend) teardown(ws *Workspace) {
437	b.mu.Lock()
438	ws.clientsMu.Lock()
439	if len(ws.clients) > 0 {
440		// Race: a CreateWorkspace re-registered a client
441		// between the detach path dropping ws.clientsMu and us
442		// taking b.mu. Abort: the workspace is still alive.
443		ws.clientsMu.Unlock()
444		b.mu.Unlock()
445		return
446	}
447	ws.clientsMu.Unlock()
448	if existing, ok := b.pathIndex[ws.resolvedPath]; ok && existing == ws.ID {
449		delete(b.pathIndex, ws.resolvedPath)
450	}
451	b.workspaces.Del(ws.ID)
452	remaining := b.workspaces.Len()
453	b.mu.Unlock()
454
455	ws.invokeShutdown()
456
457	if remaining == 0 && b.shutdownFn != nil {
458		slog.Info("Last workspace removed, shutting down server...")
459		b.shutdownFn()
460	}
461}
462
463// DeleteWorkspace is the public entry point used by the HTTP DELETE
464// handler. It releases the named client's creation hold; live streams
465// from the same client remain attached and continue holding the
466// workspace open until their own deferred DetachClient runs.
467func (b *Backend) DeleteWorkspace(id, clientID string) error {
468	return b.releaseHold(id, clientID)
469}
470
471// GetWorkspaceProto returns the proto representation of a workspace.
472func (b *Backend) GetWorkspaceProto(id string) (proto.Workspace, error) {
473	ws, err := b.GetWorkspace(id)
474	if err != nil {
475		return proto.Workspace{}, err
476	}
477	return workspaceToProto(ws), nil
478}
479
480// VersionInfo returns server version information.
481func (b *Backend) VersionInfo() proto.VersionInfo {
482	return proto.VersionInfo{
483		Version:   version.Version,
484		Commit:    version.Commit,
485		BuildID:   version.BuildID,
486		GoVersion: runtime.Version(),
487		Platform:  fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH),
488	}
489}
490
491// Config returns the server-level configuration.
492func (b *Backend) Config() *config.ConfigStore {
493	return b.cfg
494}
495
496// Shutdown initiates a graceful server shutdown.
497func (b *Backend) Shutdown() {
498	if b.shutdownFn != nil {
499		b.shutdownFn()
500	}
501}
502
503// resolveWorkspaceKey returns a stable canonical form of path suitable
504// for use as a dedup key. It applies filepath.Abs, then attempts
505// filepath.EvalSymlinks; because EvalSymlinks errors on non-existent
506// paths, it falls back to the cleaned absolute path in that case.
507func resolveWorkspaceKey(path string) (string, error) {
508	abs, err := filepath.Abs(path)
509	if err != nil {
510		return "", err
511	}
512	if resolved, err := filepath.EvalSymlinks(abs); err == nil {
513		return resolved, nil
514	}
515	return abs, nil
516}
517
518// validateClientID returns the trimmed UUID string or an error if the
519// input is empty or not a valid UUID.
520func validateClientID(id string) (string, error) {
521	if id == "" {
522		return "", ErrInvalidClientID
523	}
524	if _, err := uuid.Parse(id); err != nil {
525		return "", fmt.Errorf("%w: %v", ErrInvalidClientID, err)
526	}
527	return id, nil
528}
529
530func workspaceToProto(ws *Workspace) proto.Workspace {
531	cfg := ws.Cfg.Config()
532	return proto.Workspace{
533		ID:      ws.ID,
534		Path:    ws.Path,
535		YOLO:    ws.Cfg.Overrides().SkipPermissionRequests,
536		DataDir: cfg.Options.DataDirectory,
537		Debug:   cfg.Options.Debug,
538		Config:  cfg,
539		Env:     ws.Env,
540		Version: version.Version,
541	}
542}
543
544// logFirstWinsMismatch emits a debug line whenever a second
545// CreateWorkspace at the same resolved path arrives with flags that
546// differ from the originating workspace. The existing workspace wins;
547// the incoming flags are silently ignored.
548//
549// The comparison is done against the incoming args as the caller sent
550// them — including empty/zero values — rather than after defaulting.
551// This means that, for example, a second caller who omits DataDir
552// while the first set one will still log the mismatch.
553func logFirstWinsMismatch(existing *Workspace, args proto.Workspace) {
554	existingCfg := existing.Cfg.Config()
555	existingYOLO := existing.Cfg.Overrides().SkipPermissionRequests
556	if existingYOLO == args.YOLO &&
557		existingCfg.Options.Debug == args.Debug &&
558		existingCfg.Options.DataDirectory == args.DataDir &&
559		stringSlicesEqual(existing.Env, args.Env) {
560		return
561	}
562	slog.Debug(
563		"Workspace flag mismatch on duplicate create; first wins",
564		"workspace_id", existing.ID,
565		"path", existing.Path,
566		"existing_yolo", existingYOLO,
567		"requested_yolo", args.YOLO,
568		"existing_debug", existingCfg.Options.Debug,
569		"requested_debug", args.Debug,
570		"existing_data_dir", existingCfg.Options.DataDirectory,
571		"requested_data_dir", args.DataDir,
572		"existing_env", existing.Env,
573		"requested_env", args.Env,
574	)
575}
576
577// stringSlicesEqual reports whether a and b contain the same strings
578// in the same order. nil and empty are treated as equal.
579func stringSlicesEqual(a, b []string) bool {
580	if len(a) != len(b) {
581		return false
582	}
583	for i := range a {
584		if a[i] != b[i] {
585			return false
586		}
587	}
588	return true
589}