backend.go

  1// Package backend provides transport-agnostic operations for managing
  2// workspaces, sessions, agents, permissions, and events. It is consumed
  3// by protocol-specific layers such as HTTP (server) and ACP.
  4package backend
  5
  6import (
  7	"context"
  8	"errors"
  9	"fmt"
 10	"log/slog"
 11	"path/filepath"
 12	"runtime"
 13	"sync"
 14	"time"
 15
 16	"github.com/charmbracelet/crush/internal/app"
 17	"github.com/charmbracelet/crush/internal/config"
 18	"github.com/charmbracelet/crush/internal/csync"
 19	"github.com/charmbracelet/crush/internal/db"
 20	"github.com/charmbracelet/crush/internal/proto"
 21	"github.com/charmbracelet/crush/internal/ui/util"
 22	"github.com/charmbracelet/crush/internal/version"
 23	"github.com/google/uuid"
 24)
 25
 26// Common errors returned by backend operations.
 27var (
 28	ErrWorkspaceNotFound       = errors.New("workspace not found")
 29	ErrLSPClientNotFound       = errors.New("LSP client not found")
 30	ErrAgentNotInitialized     = errors.New("agent coordinator not initialized")
 31	ErrPathRequired            = errors.New("path is required")
 32	ErrInvalidPermissionAction = errors.New("invalid permission action")
 33	ErrUnknownCommand          = errors.New("unknown command")
 34	ErrInvalidClientID         = errors.New("invalid client_id")
 35	ErrClientNotAttached       = errors.New("client not attached")
 36)
 37
 38// DefaultCreateGrace is the window in which a client must open an SSE
 39// stream after creating a workspace before its creation hold is
 40// released. Exposed as a package variable so tests can shorten it.
 41var DefaultCreateGrace = 30 * time.Second
 42
 43// ShutdownFunc is called when the backend needs to trigger a server
 44// shutdown (e.g. when the last workspace is removed).
 45type ShutdownFunc func()
 46
 47// Backend provides transport-agnostic business logic for the Crush
 48// server. It manages workspaces and delegates to [app.App] services.
 49//
 50// Locking order: when both [Backend.mu] and [Workspace.clientsMu] are
 51// held at once, [Backend.mu] is acquired first. Detach paths
 52// ([detachStream], [releaseHoldLocked], [expireHold]) only hold
 53// [Workspace.clientsMu] briefly, drop it, then call [teardown] which
 54// takes [Backend.mu] (and then re-takes [Workspace.clientsMu] to
 55// re-check that the workspace has not been re-claimed). This avoids
 56// the AB/BA hazard with [CreateWorkspace], which holds [Backend.mu]
 57// while calling [registerClient] so that a workspace cannot be torn
 58// down beneath it.
 59type Backend struct {
 60	workspaces *csync.Map[string, *Workspace]
 61	// pathIndex maps a resolved absolute workspace path to its
 62	// workspace ID. Reads and writes are serialised via mu so
 63	// concurrent CreateWorkspace calls at the same path deduplicate
 64	// deterministically.
 65	pathIndex map[string]string
 66	mu        sync.Mutex
 67
 68	cfg         *config.ConfigStore
 69	ctx         context.Context
 70	shutdownFn  ShutdownFunc
 71	createGrace time.Duration
 72}
 73
 74// clientState tracks one client's claim on a workspace.
 75//
 76//   - streams counts the number of live SSE event streams the client
 77//     currently has open against the workspace.
 78//   - holdTimer is non-nil iff the client created the workspace but has
 79//     not yet attached an SSE stream; it fires after createGrace and
 80//     releases the hold.
 81//   - currentSessionID records which session this client is currently
 82//     viewing. Empty string means the client has no session selected
 83//     (e.g. the landing screen). Cleared automatically when the
 84//     clientState entry is removed.
 85//
 86// streams and holdTimer are mutually exclusive in practice (the hold
 87// timer is stopped the moment an SSE stream attaches), but both being
 88// zero/nil means the entry has been released and should be removed.
 89type clientState struct {
 90	streams          int
 91	holdTimer        *time.Timer
 92	currentSessionID string
 93}
 94
 95// Workspace represents a running [app.App] workspace with its
 96// associated resources and state.
 97type Workspace struct {
 98	*app.App
 99	ID   string
100	Path string
101	Cfg  *config.ConfigStore
102	Env  []string
103
104	// resolvedPath is the path used as the dedup key in
105	// Backend.pathIndex. It is filepath.EvalSymlinks(filepath.Abs(Path))
106	// with fallback to the cleaned absolute path.
107	resolvedPath string
108
109	// clientsMu guards clients. It is held only briefly (no IO).
110	clientsMu sync.Mutex
111	// clients tracks each client's claim on this workspace. Refcount
112	// is a derived value: len(clients).
113	clients map[string]*clientState
114
115	// shutdownFn is the function invoked by [Backend.teardown] to
116	// release the workspace's underlying resources. It defaults to the
117	// embedded [app.App.Shutdown]; tests may override it to avoid
118	// driving a full [app.App] through shutdown.
119	shutdownFn func()
120}
121
122// invokeShutdown calls the workspace shutdown hook if set, falling
123// back to the embedded [app.App.Shutdown] when not.
124func (w *Workspace) invokeShutdown() {
125	if w.shutdownFn != nil {
126		w.shutdownFn()
127		return
128	}
129	if w.App != nil {
130		w.Shutdown()
131	}
132}
133
134// New creates a new [Backend].
135func New(ctx context.Context, cfg *config.ConfigStore, shutdownFn ShutdownFunc) *Backend {
136	return &Backend{
137		workspaces:  csync.NewMap[string, *Workspace](),
138		pathIndex:   make(map[string]string),
139		cfg:         cfg,
140		ctx:         ctx,
141		shutdownFn:  shutdownFn,
142		createGrace: DefaultCreateGrace,
143	}
144}
145
146// SetCreateGrace overrides the create-grace window. Intended for tests
147// that need short timeouts.
148func (b *Backend) SetCreateGrace(d time.Duration) {
149	b.mu.Lock()
150	defer b.mu.Unlock()
151	b.createGrace = d
152}
153
154// GetWorkspace retrieves a workspace by ID.
155func (b *Backend) GetWorkspace(id string) (*Workspace, error) {
156	ws, ok := b.workspaces.Get(id)
157	if !ok {
158		return nil, ErrWorkspaceNotFound
159	}
160	return ws, nil
161}
162
163// ListWorkspaces returns all running workspaces.
164func (b *Backend) ListWorkspaces() []proto.Workspace {
165	workspaces := []proto.Workspace{}
166	for _, ws := range b.workspaces.Seq2() {
167		workspaces = append(workspaces, workspaceToProto(ws))
168	}
169	return workspaces
170}
171
172// CreateWorkspace initializes a new workspace from the given
173// parameters, or returns an existing workspace if one already exists at
174// the same resolved path (first-wins semantics).
175//
176// args.ClientID must be a valid UUID identifying the calling client;
177// the resulting workspace registers a creation hold on behalf of that
178// client which is released either by the first SSE attach (which
179// converts it into a stream claim) or by the grace window expiring.
180func (b *Backend) CreateWorkspace(args proto.Workspace) (*Workspace, proto.Workspace, error) {
181	if args.Path == "" {
182		return nil, proto.Workspace{}, ErrPathRequired
183	}
184	clientID, err := validateClientID(args.ClientID)
185	if err != nil {
186		return nil, proto.Workspace{}, err
187	}
188
189	key, err := resolveWorkspaceKey(args.Path)
190	if err != nil {
191		return nil, proto.Workspace{}, fmt.Errorf("failed to resolve workspace path: %w", err)
192	}
193
194	b.mu.Lock()
195	if existingID, ok := b.pathIndex[key]; ok {
196		if ws, found := b.workspaces.Get(existingID); found {
197			// Hold b.mu while registering: teardown also
198			// acquires b.mu before tearing the workspace
199			// down, so this guarantees the workspace we
200			// return cannot be torn out from under us
201			// between lookup and registerClient. Lock order
202			// here is b.mu -> ws.clientsMu.
203			logFirstWinsMismatch(ws, args)
204			b.registerClient(ws, clientID)
205			b.mu.Unlock()
206			return ws, workspaceToProto(ws), nil
207		}
208		// pathIndex referenced a workspace that has since been
209		// removed; clean the stale entry and fall through.
210		delete(b.pathIndex, key)
211	}
212	b.mu.Unlock()
213
214	id := uuid.New().String()
215	cfg, err := config.Init(args.Path, args.DataDir, args.Debug)
216	if err != nil {
217		return nil, proto.Workspace{}, fmt.Errorf("failed to initialize config: %w", err)
218	}
219
220	cfg.Overrides().SkipPermissionRequests = args.YOLO
221
222	if err := createDotCrushDir(cfg.Config().Options.DataDirectory); err != nil {
223		return nil, proto.Workspace{}, fmt.Errorf("failed to create data directory: %w", err)
224	}
225
226	conn, err := db.Connect(b.ctx, cfg.Config().Options.DataDirectory)
227	if err != nil {
228		return nil, proto.Workspace{}, fmt.Errorf("failed to connect to database: %w", err)
229	}
230
231	appWorkspace, err := app.New(b.ctx, conn, cfg)
232	if err != nil {
233		return nil, proto.Workspace{}, fmt.Errorf("failed to create app workspace: %w", err)
234	}
235
236	ws := &Workspace{
237		App:          appWorkspace,
238		ID:           id,
239		Path:         args.Path,
240		Cfg:          cfg,
241		Env:          args.Env,
242		resolvedPath: key,
243		clients:      make(map[string]*clientState),
244	}
245
246	b.mu.Lock()
247	// Re-check the index under the lock: a concurrent caller may have
248	// won the race between the initial unlock and here.
249	if existingID, ok := b.pathIndex[key]; ok {
250		if existing, found := b.workspaces.Get(existingID); found {
251			// Register under b.mu so teardown cannot run
252			// between lookup and registerClient. Lock order
253			// is b.mu -> ws.clientsMu.
254			logFirstWinsMismatch(existing, args)
255			b.registerClient(existing, clientID)
256			b.mu.Unlock()
257			ws.invokeShutdown()
258			return existing, workspaceToProto(existing), nil
259		}
260		delete(b.pathIndex, key)
261	}
262	b.workspaces.Set(id, ws)
263	b.pathIndex[key] = id
264	// Register the originating client's hold while still holding
265	// b.mu so the workspace is observable with its claim from the
266	// moment it appears in the index.
267	b.registerClient(ws, clientID)
268	b.mu.Unlock()
269
270	if args.Version != "" && args.Version != version.Version {
271		slog.Warn(
272			"Client/server version mismatch",
273			"client", args.Version,
274			"server", version.Version,
275		)
276		appWorkspace.SendEvent(util.NewWarnMsg(fmt.Sprintf(
277			"Server version %q differs from client version %q. Consider restarting the server.",
278			version.Version, args.Version,
279		)))
280	}
281
282	return ws, workspaceToProto(ws), nil
283}
284
285// AttachClient registers a new SSE stream for the given client on the
286// workspace. The stream's deferred cleanup must call DetachClient with
287// the same arguments to release the claim.
288//
289// The lookup and the clients-map mutation are performed under
290// [Backend.mu] so that AttachClient cannot race with [Backend.teardown]:
291// teardown also holds [Backend.mu] while removing the workspace from
292// b.workspaces, so once AttachClient observes the workspace and takes
293// ws.clientsMu (under b.mu), no concurrent teardown can succeed without
294// re-checking the (now non-empty) clients map. Lock order is the
295// canonical b.mu -> ws.clientsMu.
296func (b *Backend) AttachClient(workspaceID, clientID string) error {
297	if _, err := validateClientID(clientID); err != nil {
298		return err
299	}
300
301	b.mu.Lock()
302	defer b.mu.Unlock()
303	ws, ok := b.workspaces.Get(workspaceID)
304	if !ok {
305		return ErrWorkspaceNotFound
306	}
307
308	ws.clientsMu.Lock()
309	defer ws.clientsMu.Unlock()
310	cs, ok := ws.clients[clientID]
311	if !ok {
312		// Defensive: SSE attach without a prior CreateWorkspace by
313		// this client still installs a stream claim so the stream
314		// stays alive for its duration.
315		ws.clients[clientID] = &clientState{streams: 1}
316		return nil
317	}
318	if cs.holdTimer != nil {
319		cs.holdTimer.Stop()
320		cs.holdTimer = nil
321	}
322	cs.streams++
323	return nil
324}
325
326// DetachClient releases one SSE stream's hold on the workspace. If the
327// client has no other streams and no pending creation hold, its claim
328// is removed and the workspace is torn down once refcount hits zero.
329func (b *Backend) DetachClient(workspaceID, clientID string) {
330	ws, ok := b.workspaces.Get(workspaceID)
331	if !ok {
332		return
333	}
334	b.detachStream(ws, clientID)
335}
336
337// releaseHold releases the creation hold for a client, if any. Active
338// stream claims are unaffected. Idempotent: returns nil if the
339// workspace or the client's hold no longer exist.
340func (b *Backend) releaseHold(workspaceID, clientID string) error {
341	if _, err := validateClientID(clientID); err != nil {
342		return err
343	}
344	ws, ok := b.workspaces.Get(workspaceID)
345	if !ok {
346		return nil
347	}
348	b.releaseHoldLocked(ws, clientID)
349	return nil
350}
351
352// registerClient installs (idempotently) the given client's claim on
353// the workspace and starts a grace timer if the entry is fresh.
354func (b *Backend) registerClient(ws *Workspace, clientID string) {
355	ws.clientsMu.Lock()
356	defer ws.clientsMu.Unlock()
357	if _, ok := ws.clients[clientID]; ok {
358		// Idempotent: a duplicate CreateWorkspace from the same
359		// client does not add a second claim.
360		return
361	}
362	cs := &clientState{}
363	cs.holdTimer = time.AfterFunc(b.createGrace, func() {
364		b.expireHold(ws, clientID, cs)
365	})
366	ws.clients[clientID] = cs
367}
368
369// expireHold is the body of the grace timer. It runs in its own
370// goroutine and races against AttachClient/releaseHold; the timer
371// stays valid only while the entry's holdTimer still points at it.
372func (b *Backend) expireHold(ws *Workspace, clientID string, timer *clientState) {
373	ws.clientsMu.Lock()
374	cs, ok := ws.clients[clientID]
375	if !ok || cs != timer || cs.holdTimer == nil || cs.streams > 0 {
376		ws.clientsMu.Unlock()
377		return
378	}
379	cs.holdTimer = nil
380	delete(ws.clients, clientID)
381	teardown := len(ws.clients) == 0
382	ws.clientsMu.Unlock()
383	if teardown {
384		b.teardown(ws)
385	}
386}
387
388func (b *Backend) releaseHoldLocked(ws *Workspace, clientID string) {
389	ws.clientsMu.Lock()
390	cs, ok := ws.clients[clientID]
391	if !ok {
392		ws.clientsMu.Unlock()
393		return
394	}
395	if cs.holdTimer != nil {
396		cs.holdTimer.Stop()
397		cs.holdTimer = nil
398	}
399	teardown := false
400	if cs.streams == 0 {
401		delete(ws.clients, clientID)
402		teardown = len(ws.clients) == 0
403	}
404	ws.clientsMu.Unlock()
405	if teardown {
406		b.teardown(ws)
407	}
408}
409
410func (b *Backend) detachStream(ws *Workspace, clientID string) {
411	ws.clientsMu.Lock()
412	cs, ok := ws.clients[clientID]
413	if !ok {
414		ws.clientsMu.Unlock()
415		return
416	}
417	if cs.streams > 0 {
418		cs.streams--
419	}
420	teardown := false
421	if cs.streams == 0 && cs.holdTimer == nil {
422		delete(ws.clients, clientID)
423		teardown = len(ws.clients) == 0
424	}
425	ws.clientsMu.Unlock()
426	if teardown {
427		b.teardown(ws)
428	}
429}
430
431// teardown removes the workspace from the index, shuts down its
432// underlying [app.App], and triggers a server shutdown if it was the
433// last workspace alive.
434//
435// Callers reach teardown after observing len(ws.clients) == 0 while
436// holding ws.clientsMu and then releasing it. Between that release
437// and the b.mu.Lock below, a concurrent CreateWorkspace may have
438// re-registered a client (CreateWorkspace holds b.mu while doing so,
439// so it is mutually exclusive with this critical section). teardown
440// re-checks under both locks (in the canonical b.mu -> ws.clientsMu
441// order) and aborts if the workspace has been re-claimed.
442func (b *Backend) teardown(ws *Workspace) {
443	b.mu.Lock()
444	ws.clientsMu.Lock()
445	if len(ws.clients) > 0 {
446		// Race: a CreateWorkspace re-registered a client
447		// between the detach path dropping ws.clientsMu and us
448		// taking b.mu. Abort: the workspace is still alive.
449		ws.clientsMu.Unlock()
450		b.mu.Unlock()
451		return
452	}
453	ws.clientsMu.Unlock()
454	if existing, ok := b.pathIndex[ws.resolvedPath]; ok && existing == ws.ID {
455		delete(b.pathIndex, ws.resolvedPath)
456	}
457	b.workspaces.Del(ws.ID)
458	remaining := b.workspaces.Len()
459	b.mu.Unlock()
460
461	ws.invokeShutdown()
462
463	if remaining == 0 && b.shutdownFn != nil {
464		slog.Info("Last workspace removed, shutting down server...")
465		b.shutdownFn()
466	}
467}
468
469// DeleteWorkspace is the public entry point used by the HTTP DELETE
470// handler. It releases the named client's creation hold; live streams
471// from the same client remain attached and continue holding the
472// workspace open until their own deferred DetachClient runs.
473func (b *Backend) DeleteWorkspace(id, clientID string) error {
474	return b.releaseHold(id, clientID)
475}
476
477// SetCurrentSession records which session the given client is
478// currently viewing within the workspace. Passing an empty sessionID
479// clears the client's current-session entry (e.g. the client has
480// returned to the landing screen).
481//
482// The client must be actually attached — i.e. its [clientState] entry
483// must exist and have at least one live stream. A bare creation hold
484// (streams == 0) is rejected with [ErrClientNotAttached]. This
485// guards against zombie writes from a client that has detached and
486// against ghost presence from a hold-only client that never opened an
487// SSE stream.
488func (b *Backend) SetCurrentSession(workspaceID, clientID, sessionID string) error {
489	if _, err := validateClientID(clientID); err != nil {
490		return err
491	}
492	ws, ok := b.workspaces.Get(workspaceID)
493	if !ok {
494		return ErrWorkspaceNotFound
495	}
496	ws.clientsMu.Lock()
497	defer ws.clientsMu.Unlock()
498	cs, ok := ws.clients[clientID]
499	if !ok || cs.streams == 0 {
500		// No entry, or hold-only (no live stream): refuse the
501		// write. The presence record this is meant to feed
502		// should only reflect clients that can actually observe
503		// session events.
504		return ErrClientNotAttached
505	}
506	cs.currentSessionID = sessionID
507	return nil
508}
509
510// GetWorkspaceProto returns the proto representation of a workspace.
511func (b *Backend) GetWorkspaceProto(id string) (proto.Workspace, error) {
512	ws, err := b.GetWorkspace(id)
513	if err != nil {
514		return proto.Workspace{}, err
515	}
516	return workspaceToProto(ws), nil
517}
518
519// VersionInfo returns server version information.
520func (b *Backend) VersionInfo() proto.VersionInfo {
521	return proto.VersionInfo{
522		Version:   version.Version,
523		Commit:    version.Commit,
524		BuildID:   version.BuildID,
525		GoVersion: runtime.Version(),
526		Platform:  fmt.Sprintf("%s/%s", runtime.GOOS, runtime.GOARCH),
527	}
528}
529
530// Config returns the server-level configuration.
531func (b *Backend) Config() *config.ConfigStore {
532	return b.cfg
533}
534
535// Shutdown initiates a graceful server shutdown.
536func (b *Backend) Shutdown() {
537	if b.shutdownFn != nil {
538		b.shutdownFn()
539	}
540}
541
542// resolveWorkspaceKey returns a stable canonical form of path suitable
543// for use as a dedup key. It applies filepath.Abs, then attempts
544// filepath.EvalSymlinks; because EvalSymlinks errors on non-existent
545// paths, it falls back to the cleaned absolute path in that case.
546func resolveWorkspaceKey(path string) (string, error) {
547	abs, err := filepath.Abs(path)
548	if err != nil {
549		return "", err
550	}
551	if resolved, err := filepath.EvalSymlinks(abs); err == nil {
552		return resolved, nil
553	}
554	return abs, nil
555}
556
557// validateClientID returns the trimmed UUID string or an error if the
558// input is empty or not a valid UUID.
559func validateClientID(id string) (string, error) {
560	if id == "" {
561		return "", ErrInvalidClientID
562	}
563	if _, err := uuid.Parse(id); err != nil {
564		return "", fmt.Errorf("%w: %v", ErrInvalidClientID, err)
565	}
566	return id, nil
567}
568
569func workspaceToProto(ws *Workspace) proto.Workspace {
570	cfg := ws.Cfg.Config()
571	return proto.Workspace{
572		ID:      ws.ID,
573		Path:    ws.Path,
574		YOLO:    ws.Cfg.Overrides().SkipPermissionRequests,
575		DataDir: cfg.Options.DataDirectory,
576		Debug:   cfg.Options.Debug,
577		Config:  cfg,
578		Env:     ws.Env,
579		Version: version.Version,
580	}
581}
582
583// logFirstWinsMismatch emits a debug line whenever a second
584// CreateWorkspace at the same resolved path arrives with flags that
585// differ from the originating workspace. The existing workspace wins;
586// the incoming flags are silently ignored.
587//
588// The comparison is done against the incoming args as the caller sent
589// them — including empty/zero values — rather than after defaulting.
590// This means that, for example, a second caller who omits DataDir
591// while the first set one will still log the mismatch.
592func logFirstWinsMismatch(existing *Workspace, args proto.Workspace) {
593	existingCfg := existing.Cfg.Config()
594	existingYOLO := existing.Cfg.Overrides().SkipPermissionRequests
595	if existingYOLO == args.YOLO &&
596		existingCfg.Options.Debug == args.Debug &&
597		existingCfg.Options.DataDirectory == args.DataDir &&
598		stringSlicesEqual(existing.Env, args.Env) {
599		return
600	}
601	slog.Debug(
602		"Workspace flag mismatch on duplicate create; first wins",
603		"workspace_id", existing.ID,
604		"path", existing.Path,
605		"existing_yolo", existingYOLO,
606		"requested_yolo", args.YOLO,
607		"existing_debug", existingCfg.Options.Debug,
608		"requested_debug", args.Debug,
609		"existing_data_dir", existingCfg.Options.DataDirectory,
610		"requested_data_dir", args.DataDir,
611		"existing_env", existing.Env,
612		"requested_env", args.Env,
613	)
614}
615
616// stringSlicesEqual reports whether a and b contain the same strings
617// in the same order. nil and empty are treated as equal.
618func stringSlicesEqual(a, b []string) bool {
619	if len(a) != len(b) {
620		return false
621	}
622	for i := range a {
623		if a[i] != b[i] {
624			return false
625		}
626	}
627	return true
628}