datadirlock.go

  1package db
  2
  3import (
  4	"encoding/json"
  5	"errors"
  6	"fmt"
  7	"log/slog"
  8	"os"
  9	"path/filepath"
 10	"strconv"
 11	"time"
 12
 13	"github.com/charmbracelet/crush/internal/version"
 14)
 15
 16// ErrDataDirLocked is returned by Connect when the data directory is
 17// already in use by another crush process.
 18var ErrDataDirLocked = errors.New("data directory already in use by another crush process")
 19
 20// dataDirLockFile is the name of the lock file inside the data
 21// directory. It lives next to crush.db so users can `ls` and find it.
 22const dataDirLockFile = "crush.lock"
 23
 24// dataDirOwnerInfo is the JSON payload written into the lock file by
 25// the process that currently owns it. It is purely informational; the
 26// authoritative state of ownership is the operating system flock on
 27// the file descriptor.
 28type dataDirOwnerInfo struct {
 29	PID       int    `json:"pid"`
 30	Version   string `json:"version,omitempty"`
 31	StartedAt string `json:"started_at,omitempty"`
 32}
 33
 34// dataDirLock represents an acquired exclusive lock on a data
 35// directory. release closes the underlying file descriptor which the
 36// kernel uses to drop the OS-level lock.
 37type dataDirLock struct {
 38	release func()
 39}
 40
 41// acquireDataDirLock takes an exclusive non-blocking lock on
 42// {dataDir}/crush.lock. If the lock is already held by another
 43// process, it returns ErrDataDirLocked wrapped with a diagnostic that
 44// includes whatever owner info that process wrote.
 45//
 46// Acquisition is skipped (returning a no-op lock) when
 47// CRUSH_SKIP_DATADIR_LOCK is set to a truthy value. This is intended
 48// as an escape hatch for hostile filesystems that do not implement
 49// advisory locking; it should not be used in normal operation.
 50func acquireDataDirLock(dataDir string) (*dataDirLock, error) {
 51	if skipDataDirLock() {
 52		return &dataDirLock{release: func() {}}, nil
 53	}
 54
 55	path := filepath.Join(dataDir, dataDirLockFile)
 56	release, err := tryFileLock(path)
 57	if err != nil {
 58		if errors.Is(err, errLockContended) {
 59			return nil, contendedLockError(dataDir, path)
 60		}
 61		return nil, fmt.Errorf("failed to lock data directory %q: %w", dataDir, err)
 62	}
 63
 64	// Record ownership metadata so a contending process can identify
 65	// us. Failures here are non-fatal: the OS-level lock is what
 66	// actually guarantees mutual exclusion, and a missing/partial JSON
 67	// payload only degrades the diagnostic a contender prints.
 68	if err := writeOwnerInfo(path); err != nil {
 69		slog.Debug("Failed to write data-dir owner info", "path", path, "error", err)
 70	}
 71
 72	// The lock file itself is intentionally never unlinked. flock is
 73	// keyed by inode, not by path, and any close-then-unlink (or
 74	// unlink-then-close) ordering opens a window where two processes
 75	// can each hold a flock on a different inode that lives at the
 76	// same path. Leaving the file in place lets every acquirer see
 77	// the same inode and lets the kernel arbitrate correctly.
 78	return &dataDirLock{release: release}, nil
 79}
 80
 81// skipDataDirLock reports whether the data-dir lock should be bypassed.
 82func skipDataDirLock() bool {
 83	v, _ := strconv.ParseBool(os.Getenv("CRUSH_SKIP_DATADIR_LOCK"))
 84	return v
 85}
 86
 87// writeOwnerInfo truncates and rewrites the lock file with the current
 88// process's identifying information. It is called only after the lock
 89// is held.
 90func writeOwnerInfo(path string) error {
 91	info := dataDirOwnerInfo{
 92		PID:       os.Getpid(),
 93		Version:   version.Version,
 94		StartedAt: time.Now().UTC().Format(time.RFC3339),
 95	}
 96	payload, err := json.MarshalIndent(info, "", "  ")
 97	if err != nil {
 98		return err
 99	}
100	payload = append(payload, '\n')
101	return os.WriteFile(path, payload, 0o600)
102}
103
104// readOwnerInfo returns the lock file's recorded owner, if it parses.
105// A missing or malformed file yields an empty struct and no error;
106// the caller decides what to surface to the user.
107func readOwnerInfo(path string) dataDirOwnerInfo {
108	raw, err := os.ReadFile(path)
109	if err != nil || len(raw) == 0 {
110		return dataDirOwnerInfo{}
111	}
112	var info dataDirOwnerInfo
113	_ = json.Unmarshal(raw, &info)
114	return info
115}
116
117// contendedLockError builds a wrapped ErrDataDirLocked annotated with
118// whatever owner metadata is currently in the lock file.
119func contendedLockError(dataDir, lockPath string) error {
120	info := readOwnerInfo(lockPath)
121	details := ""
122	switch {
123	case info.PID != 0 && info.StartedAt != "":
124		details = fmt.Sprintf(" (owner pid=%d version=%s started_at=%s)",
125			info.PID, info.Version, info.StartedAt)
126	case info.PID != 0:
127		details = fmt.Sprintf(" (owner pid=%d)", info.PID)
128	}
129	return fmt.Errorf("%w: %s%s", ErrDataDirLocked, dataDir, details)
130}