diff --git a/internal/backend/backend.go b/internal/backend/backend.go index 8b78f85fb1c06114ec4bafc55371c82f623e2bf7..55e3a2785a77de4ab849cc367eb189abae81dbdb 100644 --- a/internal/backend/backend.go +++ b/internal/backend/backend.go @@ -223,7 +223,7 @@ func (b *Backend) CreateWorkspace(args proto.Workspace) (*Workspace, proto.Works return nil, proto.Workspace{}, fmt.Errorf("failed to create data directory: %w", err) } - conn, err := db.Connect(b.ctx, cfg.Config().Options.DataDirectory) + conn, err := db.Connect(b.ctx, cfg.Config().Options.DataDirectory, db.WithDataDirLock(true)) if err != nil { return nil, proto.Workspace{}, fmt.Errorf("failed to connect to database: %w", err) } diff --git a/internal/db/connect.go b/internal/db/connect.go index 1ed0f69a45a9526f9dce25257b531a97ad73d8c6..706fdab1216beac1c92db01c3702d2366725a3ee 100644 --- a/internal/db/connect.go +++ b/internal/db/connect.go @@ -56,16 +56,39 @@ var ( poolMu sync.Mutex ) +// ConnectOption configures a Connect call. Options are applied in +// order; later options override earlier ones for the same field. +type ConnectOption func(*connectOptions) + +// connectOptions holds the resolved configuration for a Connect call. +type connectOptions struct { + lockDataDir bool +} + +// WithDataDirLock toggles acquisition of the per-data-directory lock +// for this Connect call. The lock is off by default so local-mode +// invocations do not regress today's behavior; the server's +// workspace-bootstrap path opts in. CRUSH_SKIP_DATADIR_LOCK still +// bypasses acquisition even when this option is set. +func WithDataDirLock(enable bool) ConnectOption { + return func(o *connectOptions) { o.lockDataDir = enable } +} + // Connect opens a SQLite database connection for the given data // directory and runs migrations. If a connection to the same database // file already exists, the existing connection is returned with its // reference count incremented. Callers must pair each Connect with a // [Release] when they no longer need the connection. -func Connect(ctx context.Context, dataDir string) (*sql.DB, error) { +func Connect(ctx context.Context, dataDir string, opts ...ConnectOption) (*sql.DB, error) { if dataDir == "" { return nil, fmt.Errorf("data.dir is not set") } + var cfg connectOptions + for _, opt := range opts { + opt(&cfg) + } + dbPath := filepath.Join(dataDir, "crush.db") // Resolve to an absolute path so that different relative paths to @@ -88,18 +111,25 @@ func Connect(ctx context.Context, dataDir string) (*sql.DB, error) { // crush process on the same SQLite file. The lock is released when // the matching Release call drops the refcount to zero. Ensuring // the data directory exists is required because the lock file - // lives inside it. + // lives inside it. Locking is opt-in via WithDataDirLock so that + // local-mode invocations do not refuse a second crush against the + // same data dir until client/server becomes the default. if err := os.MkdirAll(dataDir, 0o700); err != nil { return nil, fmt.Errorf("failed to create data directory %q: %w", dataDir, err) } - lock, err := acquireDataDirLock(dataDir) - if err != nil { - return nil, err + var lock *dataDirLock + if cfg.lockDataDir && !skipDataDirLock() { + lock, err = acquireDataDirLock(dataDir) + if err != nil { + return nil, err + } } conn, err := openDB(dbPath) if err != nil { - lock.release() + if lock != nil { + lock.release() + } return nil, err } @@ -110,22 +140,28 @@ func Connect(ctx context.Context, dataDir string) (*sql.DB, error) { // resulting in SQLITE_NOTADB (26) on the next open. conn.SetMaxOpenConns(1) + releaseLock := func() { + if lock != nil { + lock.release() + } + } + if err = conn.PingContext(ctx); err != nil { conn.Close() - lock.release() + releaseLock() return nil, fmt.Errorf("failed to connect to database: %w", err) } if err := initGoose(); err != nil { conn.Close() - lock.release() + releaseLock() slog.Error("Failed to initialize goose", "error", err) return nil, fmt.Errorf("failed to initialize goose: %w", err) } if err := goose.Up(conn, "migrations"); err != nil { conn.Close() - lock.release() + releaseLock() slog.Error("Failed to apply migrations", "error", err) return nil, fmt.Errorf("failed to apply migrations: %w", err) } diff --git a/internal/db/connect_test.go b/internal/db/connect_test.go index d3b7fc86351d2cf43300d683fc9df0ad77b639b6..45e39758924a9351b07bdb5956ddbf1ae85d1b02 100644 --- a/internal/db/connect_test.go +++ b/internal/db/connect_test.go @@ -69,7 +69,7 @@ func TestConnect_FailsWhenDataDirLocked(t *testing.T) { require.NoError(t, err, "expected to take the data-dir lock for the first time") t.Cleanup(release) - _, err = Connect(context.Background(), dataDir) + _, err = Connect(context.Background(), dataDir, WithDataDirLock(true)) require.Error(t, err, "Connect must refuse to open a locked data dir") require.ErrorIs(t, err, ErrDataDirLocked) } @@ -85,12 +85,12 @@ func TestConnect_SucceedsAfterContenderReleases(t *testing.T) { release, err := tryFileLock(lockPath) require.NoError(t, err) - _, err = Connect(context.Background(), dataDir) + _, err = Connect(context.Background(), dataDir, WithDataDirLock(true)) require.ErrorIs(t, err, ErrDataDirLocked) release() - conn, err := Connect(context.Background(), dataDir) + conn, err := Connect(context.Background(), dataDir, WithDataDirLock(true)) require.NoError(t, err, "Connect should succeed once the contender releases the lock") require.NoError(t, conn.PingContext(context.Background())) require.NoError(t, Release(dataDir)) @@ -105,7 +105,7 @@ func TestConnect_LockReleasedOnFinalRelease(t *testing.T) { dataDir := t.TempDir() lockPath := filepath.Join(dataDir, dataDirLockFile) - conn, err := Connect(context.Background(), dataDir) + conn, err := Connect(context.Background(), dataDir, WithDataDirLock(true)) require.NoError(t, err) require.NoError(t, conn.PingContext(context.Background())) @@ -135,10 +135,10 @@ func TestConnect_SharedPoolDoesNotReacquireLock(t *testing.T) { dataDir := t.TempDir() lockPath := filepath.Join(dataDir, dataDirLockFile) - _, err := Connect(context.Background(), dataDir) + _, err := Connect(context.Background(), dataDir, WithDataDirLock(true)) require.NoError(t, err) - _, err = Connect(context.Background(), dataDir) + _, err = Connect(context.Background(), dataDir, WithDataDirLock(true)) require.NoError(t, err) // Drop one reference; lock must still be held. @@ -163,8 +163,47 @@ func TestConnect_SkipLockEnvBypassesAcquisition(t *testing.T) { t.Setenv("CRUSH_SKIP_DATADIR_LOCK", "1") - conn, err := Connect(context.Background(), dataDir) + conn, err := Connect(context.Background(), dataDir, WithDataDirLock(true)) require.NoError(t, err, "skip-lock env should bypass contention") require.NoError(t, conn.PingContext(context.Background())) require.NoError(t, Release(dataDir)) } + +// TestConnect_DefaultIgnoresContendedLock confirms that without +// WithDataDirLock(true) the lock file is irrelevant: a contender can +// hold tryFileLock and Connect still succeeds. This pins the +// local-mode default to its pre-lock behavior. +func TestConnect_DefaultIgnoresContendedLock(t *testing.T) { + t.Cleanup(ResetPool) + + dataDir := t.TempDir() + lockPath := filepath.Join(dataDir, dataDirLockFile) + + release, err := tryFileLock(lockPath) + require.NoError(t, err, "expected to take the data-dir lock for the first time") + t.Cleanup(release) + + conn, err := Connect(context.Background(), dataDir) + require.NoError(t, err, "default Connect must not take the lock and must succeed under contention") + require.NoError(t, conn.PingContext(context.Background())) + require.NoError(t, Release(dataDir)) +} + +// TestConnect_ServerPathFailsWhenDataDirLocked is the server's +// workspace-bootstrap analogue of TestConnect_FailsWhenDataDirLocked: +// passing WithDataDirLock(true) must surface ErrDataDirLocked when a +// contender already holds the lock. +func TestConnect_ServerPathFailsWhenDataDirLocked(t *testing.T) { + t.Cleanup(ResetPool) + + dataDir := t.TempDir() + lockPath := filepath.Join(dataDir, dataDirLockFile) + + release, err := tryFileLock(lockPath) + require.NoError(t, err, "expected to take the data-dir lock for the first time") + t.Cleanup(release) + + _, err = Connect(context.Background(), dataDir, WithDataDirLock(true)) + require.Error(t, err, "server-path Connect must refuse to open a locked data dir") + require.ErrorIs(t, err, ErrDataDirLocked) +}