datadirlock.go

  1package db
  2
  3import (
  4	"encoding/json"
  5	"errors"
  6	"fmt"
  7	"log/slog"
  8	"os"
  9	"path/filepath"
 10	"strconv"
 11	"time"
 12
 13	"github.com/charmbracelet/crush/internal/lock"
 14	"github.com/charmbracelet/crush/internal/version"
 15)
 16
 17// ErrDataDirLocked is returned by Connect when the data directory is
 18// already in use by another crush process.
 19var ErrDataDirLocked = errors.New("data directory already in use by another crush process")
 20
 21// dataDirLockFile is the name of the lock file inside the data
 22// directory. It lives next to crush.db so users can `ls` and find it.
 23const dataDirLockFile = "crush.lock"
 24
 25// dataDirOwnerInfo is the JSON payload written into the lock file by
 26// the process that currently owns it. It is purely informational; the
 27// authoritative state of ownership is the operating system flock on
 28// the file descriptor.
 29type dataDirOwnerInfo struct {
 30	PID       int    `json:"pid"`
 31	Version   string `json:"version,omitempty"`
 32	StartedAt string `json:"started_at,omitempty"`
 33}
 34
 35// dataDirLock represents an acquired exclusive lock on a data
 36// directory. release closes the underlying file descriptor which the
 37// kernel uses to drop the OS-level lock.
 38type dataDirLock struct {
 39	release func()
 40}
 41
 42// acquireDataDirLock takes an exclusive non-blocking lock on
 43// {dataDir}/crush.lock. If the lock is already held by another
 44// process, it returns ErrDataDirLocked wrapped with a diagnostic that
 45// includes whatever owner info that process wrote.
 46//
 47// Acquisition is skipped (returning a no-op lock) when
 48// CRUSH_SKIP_DATADIR_LOCK is set to a truthy value. This is intended
 49// as an escape hatch for hostile filesystems that do not implement
 50// advisory locking; it should not be used in normal operation.
 51func acquireDataDirLock(dataDir string) (*dataDirLock, error) {
 52	if skipDataDirLock() {
 53		return &dataDirLock{release: func() {}}, nil
 54	}
 55
 56	path := filepath.Join(dataDir, dataDirLockFile)
 57	release, err := lock.TryFile(path)
 58	if err != nil {
 59		if errors.Is(err, lock.ErrContended) {
 60			return nil, contendedLockError(dataDir, path)
 61		}
 62		return nil, fmt.Errorf("failed to lock data directory %q: %w", dataDir, err)
 63	}
 64
 65	// Record ownership metadata so a contending process can identify
 66	// us. Failures here are non-fatal: the OS-level lock is what
 67	// actually guarantees mutual exclusion, and a missing/partial JSON
 68	// payload only degrades the diagnostic a contender prints.
 69	if err := writeOwnerInfo(path); err != nil {
 70		slog.Debug("Failed to write data-dir owner info", "path", path, "error", err)
 71	}
 72
 73	// The lock file itself is intentionally never unlinked. flock is
 74	// keyed by inode, not by path, and any close-then-unlink (or
 75	// unlink-then-close) ordering opens a window where two processes
 76	// can each hold a flock on a different inode that lives at the
 77	// same path. Leaving the file in place lets every acquirer see
 78	// the same inode and lets the kernel arbitrate correctly.
 79	return &dataDirLock{release: release}, nil
 80}
 81
 82// skipDataDirLock reports whether the data-dir lock should be bypassed.
 83func skipDataDirLock() bool {
 84	v, _ := strconv.ParseBool(os.Getenv("CRUSH_SKIP_DATADIR_LOCK"))
 85	return v
 86}
 87
 88// writeOwnerInfo truncates and rewrites the lock file with the current
 89// process's identifying information. It is called only after the lock
 90// is held.
 91func writeOwnerInfo(path string) error {
 92	info := dataDirOwnerInfo{
 93		PID:       os.Getpid(),
 94		Version:   version.Version,
 95		StartedAt: time.Now().UTC().Format(time.RFC3339),
 96	}
 97	payload, err := json.MarshalIndent(info, "", "  ")
 98	if err != nil {
 99		return err
100	}
101	payload = append(payload, '\n')
102	return os.WriteFile(path, payload, 0o600)
103}
104
105// readOwnerInfo returns the lock file's recorded owner, if it parses.
106// A missing or malformed file yields an empty struct and no error;
107// the caller decides what to surface to the user.
108func readOwnerInfo(path string) dataDirOwnerInfo {
109	raw, err := os.ReadFile(path)
110	if err != nil || len(raw) == 0 {
111		return dataDirOwnerInfo{}
112	}
113	var info dataDirOwnerInfo
114	_ = json.Unmarshal(raw, &info)
115	return info
116}
117
118// contendedLockError builds a wrapped ErrDataDirLocked annotated with
119// whatever owner metadata is currently in the lock file.
120func contendedLockError(dataDir, lockPath string) error {
121	info := readOwnerInfo(lockPath)
122	details := ""
123	switch {
124	case info.PID != 0 && info.StartedAt != "":
125		details = fmt.Sprintf(" (owner pid=%d version=%s started_at=%s)",
126			info.PID, info.Version, info.StartedAt)
127	case info.PID != 0:
128		details = fmt.Sprintf(" (owner pid=%d)", info.PID)
129	}
130	return fmt.Errorf("%w: %s%s", ErrDataDirLocked, dataDir, details)
131}