refactor(config): replace reload booleans with reloadMu mutex

Kieran Klukas created

Replace autoReloadDisabled and reloadInProgress boolean guards with a
sync.Mutex (reloadMu) to serialize ReloadFromDisk calls. autoReload
uses TryLock to skip redundant reloads when one is already in progress,
handling both concurrent auto-reloads after parallel writes and
re-entrant calls from configureProviders during a reload.

The autoReload comment now honestly documents the lost-update window:
if a write completes after the in-progress reload has already read the
config file, that change won't appear in memory until the next reload.
Callers needing guaranteed freshness should call ReloadFromDisk.

The load.go RemoveConfigField comment is corrected to explain that
Providers.Del keeps in-memory state consistent, not reload magic.

Also fixes an aliasing bug where append could mutate the package-level
defaultContextPaths slice by cloning it before prepending.

Change summary

internal/agent/coordinator.go |  2 
internal/config/load.go       | 18 ++++++++------
internal/config/store.go      | 43 ++++++++++++++++++++++++------------
internal/config/store_test.go | 12 ++-------
4 files changed, 42 insertions(+), 33 deletions(-)

Detailed changes

internal/agent/coordinator.go 🔗

@@ -260,7 +260,7 @@ func (c *coordinator) Run(ctx context.Context, sessionID string, prompt string,
 	}
 	beforeLoaded := c.skillTracker.LoadedNames()
 	var result *fantasy.AgentResult
-	var originalErr error = c.runWithUnauthorizedRetry(ctx, providerCfg, func() error {
+	originalErr := c.runWithUnauthorizedRetry(ctx, providerCfg, func() error {
 		var err error
 		result, err = run()
 		return err

internal/config/load.go 🔗

@@ -108,10 +108,10 @@ func Load(workingDir, dataDir string, debug bool) (*ConfigStore, error) {
 	valueResolver := NewShellVariableResolver(env)
 	store.resolver = valueResolver
 
-	// Disable auto-reload during initial load to prevent nested calls from
-	// config-modifying operations inside configureProviders.
-	store.autoReloadDisabled = true
-	defer func() { store.autoReloadDisabled = false }()
+	// Hold reloadMu during initial load to prevent configureProviders
+	// from triggering auto-reload via RemoveConfigField.
+	store.reloadMu.Lock()
+	defer store.reloadMu.Unlock()
 
 	if err := cfg.configureProviders(store, env, valueResolver, store.knownProviders); err != nil {
 		return nil, fmt.Errorf("failed to configure providers: %w", err)
@@ -266,9 +266,11 @@ func (c *Config) configureProviders(store *ConfigStore, env env.Env, resolver Va
 		switch {
 		case p.ID == catwalk.InferenceProviderAnthropic && config.OAuthToken != nil:
 			// Claude Code subscription is not supported anymore. Remove to show onboarding.
-			if !store.reloadInProgress {
-				store.RemoveConfigField(ScopeGlobal, "providers.anthropic")
-			}
+			// RemoveConfigField persists the deletion to disk. The in-memory
+			// state is kept consistent by the Providers.Del call below; any
+			// concurrent reload that races with this write will also see the
+			// removal because it re-reads from disk.
+			store.RemoveConfigField(ScopeGlobal, "providers.anthropic")
 			c.Providers.Del(string(p.ID))
 			continue
 		case p.ID == catwalk.InferenceProviderCopilot && config.OAuthToken != nil:
@@ -445,7 +447,7 @@ func (c *Config) setDefaults(workingDir, dataDir string) {
 	c.applyLSPDefaults()
 
 	// Add the default context paths if they are not already present
-	c.Options.ContextPaths = append(defaultContextPaths, c.Options.ContextPaths...)
+	c.Options.ContextPaths = append(slices.Clone(defaultContextPaths), c.Options.ContextPaths...)
 	slices.Sort(c.Options.ContextPaths)
 	c.Options.ContextPaths = slices.Compact(c.Options.ContextPaths)
 

internal/config/store.go 🔗

@@ -50,6 +50,10 @@ type RuntimeOverrides struct {
 // RemoveConfigField, RefreshOAuthToken) to prevent both in-process
 // goroutine races and, together with the shared lock.File, cross-process
 // races on the config file.
+//
+// reloadMu serialises ReloadFromDisk calls to prevent concurrent reloads
+// from racing on store fields. autoReload uses TryLock on reloadMu to
+// skip redundant reloads when one is already in progress.
 type ConfigStore struct {
 	config             *Config
 	workingDir         string
@@ -61,10 +65,9 @@ type ConfigStore struct {
 	overrides          RuntimeOverrides
 	trackedConfigPaths []string                // unique, normalized config file paths
 	snapshots          map[string]fileSnapshot // path -> snapshot at last capture
-	autoReloadDisabled bool                    // set during load/reload to prevent re-entrancy
-	reloadInProgress   bool                    // set during reload to avoid disk writes mid-reload
 
-	mu sync.Mutex
+	mu       sync.Mutex // serialises config file writes
+	reloadMu sync.Mutex // serialises ReloadFromDisk calls
 }
 
 // Config returns the pure-data config struct (read-only after load).
@@ -720,19 +723,18 @@ func (s *ConfigStore) captureStalenessSnapshot(paths []string) {
 // ReloadFromDisk re-runs the config load/merge flow and updates the in-memory
 // config atomically. It rebuilds the staleness snapshot after successful reload.
 // On failure, the store state is rolled back to its previous state.
+// Concurrent calls are serialised via reloadMu.
 func (s *ConfigStore) ReloadFromDisk(ctx context.Context) error {
 	if s.workingDir == "" {
 		return fmt.Errorf("cannot reload: working directory not set")
 	}
+	s.reloadMu.Lock()
+	defer s.reloadMu.Unlock()
+	return s.reloadFromDiskLocked(ctx)
+}
 
-	// Disable auto-reload during reload to prevent nested/re-entrant calls.
-	s.autoReloadDisabled = true
-	s.reloadInProgress = true
-	defer func() {
-		s.autoReloadDisabled = false
-		s.reloadInProgress = false
-	}()
-
+// reloadFromDiskLocked performs the actual reload. Caller must hold reloadMu.
+func (s *ConfigStore) reloadFromDiskLocked(ctx context.Context) error {
 	// Migrate deprecated disable_notifications before reloading config.
 	migrateDisableNotifications()
 
@@ -835,11 +837,22 @@ func (s *ConfigStore) ReloadFromDisk(ctx context.Context) error {
 // disabled during load/reload flows, or when working directory is not set
 // (e.g., during testing). Only actual reload failures return an error.
 func (s *ConfigStore) autoReload(ctx context.Context) error {
-	if s.autoReloadDisabled {
-		return nil // Expected skip: already in load/reload flow
-	}
 	if s.workingDir == "" {
 		return nil // Expected skip: working directory not set
 	}
-	return s.ReloadFromDisk(ctx)
+	// Skip if a reload is already in progress. This handles both
+	// concurrent auto-reloads after parallel writes and re-entrant
+	// calls from configureProviders during a reload.
+	//
+	// Note: if a write completes after the in-progress reload has
+	// already read the config file, that write won't be reflected in
+	// memory until the next reload. This is acceptable because writes
+	// are rare and the next user action or file-watch tick will pick
+	// up the change. Callers that need guaranteed fresh state after a
+	// write should call ReloadFromDisk explicitly.
+	if !s.reloadMu.TryLock() {
+		return nil
+	}
+	defer s.reloadMu.Unlock()
+	return s.reloadFromDiskLocked(ctx)
 }

internal/config/store_test.go 🔗

@@ -497,27 +497,21 @@ func TestAutoReloadDisabledDuringReload(t *testing.T) {
 	}`
 	require.NoError(t, os.WriteFile(configPath, []byte(initialConfig), 0o600))
 
-	// Load will trigger configureProviders which removes anthropic OAuth config
-	// This should NOT cause infinite recursion thanks to autoReloadDisabled guard
+	// Load will trigger configureProviders which removes anthropic OAuth config.
+	// This should NOT cause infinite recursion — reloadMu prevents re-entrant reloads.
 	store, err := Load(dir, dir, false)
 	require.NoError(t, err)
 
-	// Verify the store loaded successfully and autoReloadDisabled was unset
-	require.False(t, store.autoReloadDisabled)
-
 	// Capture snapshot and verify reload also works without recursion
 	store.globalDataPath = configPath
 	store.CaptureStalenessSnapshot([]string{configPath})
 
-	// Modify file and reload - this should work without re-entrancy issues
+	// Modify file and reload — this should work without re-entrancy issues
 	time.Sleep(10 * time.Millisecond)
 	require.NoError(t, os.WriteFile(configPath, []byte(`{"options": {"debug": true}}`), 0o600))
 
 	err = store.ReloadFromDisk(context.Background())
 	require.NoError(t, err)
-
-	// Verify reload completed successfully
-	require.False(t, store.autoReloadDisabled, "autoReloadDisabled should be false after ReloadFromDisk")
 }
 
 // TestSetConfigFields_AutoReloadsAtomically verifies that SetConfigFields writes