@@ -223,7 +223,7 @@ func (b *Backend) CreateWorkspace(args proto.Workspace) (*Workspace, proto.Works
return nil, proto.Workspace{}, fmt.Errorf("failed to create data directory: %w", err)
}
- conn, err := db.Connect(b.ctx, cfg.Config().Options.DataDirectory)
+ conn, err := db.Connect(b.ctx, cfg.Config().Options.DataDirectory, db.WithDataDirLock(true))
if err != nil {
return nil, proto.Workspace{}, fmt.Errorf("failed to connect to database: %w", err)
}
@@ -56,16 +56,39 @@ var (
poolMu sync.Mutex
)
+// ConnectOption configures a Connect call. Options are applied in
+// order; later options override earlier ones for the same field.
+type ConnectOption func(*connectOptions)
+
+// connectOptions holds the resolved configuration for a Connect call.
+type connectOptions struct {
+ lockDataDir bool
+}
+
+// WithDataDirLock toggles acquisition of the per-data-directory lock
+// for this Connect call. The lock is off by default so local-mode
+// invocations do not regress today's behavior; the server's
+// workspace-bootstrap path opts in. CRUSH_SKIP_DATADIR_LOCK still
+// bypasses acquisition even when this option is set.
+func WithDataDirLock(enable bool) ConnectOption {
+ return func(o *connectOptions) { o.lockDataDir = enable }
+}
+
// Connect opens a SQLite database connection for the given data
// directory and runs migrations. If a connection to the same database
// file already exists, the existing connection is returned with its
// reference count incremented. Callers must pair each Connect with a
// [Release] when they no longer need the connection.
-func Connect(ctx context.Context, dataDir string) (*sql.DB, error) {
+func Connect(ctx context.Context, dataDir string, opts ...ConnectOption) (*sql.DB, error) {
if dataDir == "" {
return nil, fmt.Errorf("data.dir is not set")
}
+ var cfg connectOptions
+ for _, opt := range opts {
+ opt(&cfg)
+ }
+
dbPath := filepath.Join(dataDir, "crush.db")
// Resolve to an absolute path so that different relative paths to
@@ -88,18 +111,25 @@ func Connect(ctx context.Context, dataDir string) (*sql.DB, error) {
// crush process on the same SQLite file. The lock is released when
// the matching Release call drops the refcount to zero. Ensuring
// the data directory exists is required because the lock file
- // lives inside it.
+ // lives inside it. Locking is opt-in via WithDataDirLock so that
+ // local-mode invocations do not refuse a second crush against the
+ // same data dir until client/server becomes the default.
if err := os.MkdirAll(dataDir, 0o700); err != nil {
return nil, fmt.Errorf("failed to create data directory %q: %w", dataDir, err)
}
- lock, err := acquireDataDirLock(dataDir)
- if err != nil {
- return nil, err
+ var lock *dataDirLock
+ if cfg.lockDataDir && !skipDataDirLock() {
+ lock, err = acquireDataDirLock(dataDir)
+ if err != nil {
+ return nil, err
+ }
}
conn, err := openDB(dbPath)
if err != nil {
- lock.release()
+ if lock != nil {
+ lock.release()
+ }
return nil, err
}
@@ -110,22 +140,28 @@ func Connect(ctx context.Context, dataDir string) (*sql.DB, error) {
// resulting in SQLITE_NOTADB (26) on the next open.
conn.SetMaxOpenConns(1)
+ releaseLock := func() {
+ if lock != nil {
+ lock.release()
+ }
+ }
+
if err = conn.PingContext(ctx); err != nil {
conn.Close()
- lock.release()
+ releaseLock()
return nil, fmt.Errorf("failed to connect to database: %w", err)
}
if err := initGoose(); err != nil {
conn.Close()
- lock.release()
+ releaseLock()
slog.Error("Failed to initialize goose", "error", err)
return nil, fmt.Errorf("failed to initialize goose: %w", err)
}
if err := goose.Up(conn, "migrations"); err != nil {
conn.Close()
- lock.release()
+ releaseLock()
slog.Error("Failed to apply migrations", "error", err)
return nil, fmt.Errorf("failed to apply migrations: %w", err)
}
@@ -69,7 +69,7 @@ func TestConnect_FailsWhenDataDirLocked(t *testing.T) {
require.NoError(t, err, "expected to take the data-dir lock for the first time")
t.Cleanup(release)
- _, err = Connect(context.Background(), dataDir)
+ _, err = Connect(context.Background(), dataDir, WithDataDirLock(true))
require.Error(t, err, "Connect must refuse to open a locked data dir")
require.ErrorIs(t, err, ErrDataDirLocked)
}
@@ -85,12 +85,12 @@ func TestConnect_SucceedsAfterContenderReleases(t *testing.T) {
release, err := tryFileLock(lockPath)
require.NoError(t, err)
- _, err = Connect(context.Background(), dataDir)
+ _, err = Connect(context.Background(), dataDir, WithDataDirLock(true))
require.ErrorIs(t, err, ErrDataDirLocked)
release()
- conn, err := Connect(context.Background(), dataDir)
+ conn, err := Connect(context.Background(), dataDir, WithDataDirLock(true))
require.NoError(t, err, "Connect should succeed once the contender releases the lock")
require.NoError(t, conn.PingContext(context.Background()))
require.NoError(t, Release(dataDir))
@@ -105,7 +105,7 @@ func TestConnect_LockReleasedOnFinalRelease(t *testing.T) {
dataDir := t.TempDir()
lockPath := filepath.Join(dataDir, dataDirLockFile)
- conn, err := Connect(context.Background(), dataDir)
+ conn, err := Connect(context.Background(), dataDir, WithDataDirLock(true))
require.NoError(t, err)
require.NoError(t, conn.PingContext(context.Background()))
@@ -135,10 +135,10 @@ func TestConnect_SharedPoolDoesNotReacquireLock(t *testing.T) {
dataDir := t.TempDir()
lockPath := filepath.Join(dataDir, dataDirLockFile)
- _, err := Connect(context.Background(), dataDir)
+ _, err := Connect(context.Background(), dataDir, WithDataDirLock(true))
require.NoError(t, err)
- _, err = Connect(context.Background(), dataDir)
+ _, err = Connect(context.Background(), dataDir, WithDataDirLock(true))
require.NoError(t, err)
// Drop one reference; lock must still be held.
@@ -163,8 +163,47 @@ func TestConnect_SkipLockEnvBypassesAcquisition(t *testing.T) {
t.Setenv("CRUSH_SKIP_DATADIR_LOCK", "1")
- conn, err := Connect(context.Background(), dataDir)
+ conn, err := Connect(context.Background(), dataDir, WithDataDirLock(true))
require.NoError(t, err, "skip-lock env should bypass contention")
require.NoError(t, conn.PingContext(context.Background()))
require.NoError(t, Release(dataDir))
}
+
+// TestConnect_DefaultIgnoresContendedLock confirms that without
+// WithDataDirLock(true) the lock file is irrelevant: a contender can
+// hold tryFileLock and Connect still succeeds. This pins the
+// local-mode default to its pre-lock behavior.
+func TestConnect_DefaultIgnoresContendedLock(t *testing.T) {
+ t.Cleanup(ResetPool)
+
+ dataDir := t.TempDir()
+ lockPath := filepath.Join(dataDir, dataDirLockFile)
+
+ release, err := tryFileLock(lockPath)
+ require.NoError(t, err, "expected to take the data-dir lock for the first time")
+ t.Cleanup(release)
+
+ conn, err := Connect(context.Background(), dataDir)
+ require.NoError(t, err, "default Connect must not take the lock and must succeed under contention")
+ require.NoError(t, conn.PingContext(context.Background()))
+ require.NoError(t, Release(dataDir))
+}
+
+// TestConnect_ServerPathFailsWhenDataDirLocked is the server's
+// workspace-bootstrap analogue of TestConnect_FailsWhenDataDirLocked:
+// passing WithDataDirLock(true) must surface ErrDataDirLocked when a
+// contender already holds the lock.
+func TestConnect_ServerPathFailsWhenDataDirLocked(t *testing.T) {
+ t.Cleanup(ResetPool)
+
+ dataDir := t.TempDir()
+ lockPath := filepath.Join(dataDir, dataDirLockFile)
+
+ release, err := tryFileLock(lockPath)
+ require.NoError(t, err, "expected to take the data-dir lock for the first time")
+ t.Cleanup(release)
+
+ _, err = Connect(context.Background(), dataDir, WithDataDirLock(true))
+ require.Error(t, err, "server-path Connect must refuse to open a locked data dir")
+ require.ErrorIs(t, err, ErrDataDirLocked)
+}