Detailed changes
@@ -260,7 +260,7 @@ func (c *coordinator) Run(ctx context.Context, sessionID string, prompt string,
}
beforeLoaded := c.skillTracker.LoadedNames()
var result *fantasy.AgentResult
- var originalErr error = c.runWithUnauthorizedRetry(ctx, providerCfg, func() error {
+ originalErr := c.runWithUnauthorizedRetry(ctx, providerCfg, func() error {
var err error
result, err = run()
return err
@@ -108,10 +108,10 @@ func Load(workingDir, dataDir string, debug bool) (*ConfigStore, error) {
valueResolver := NewShellVariableResolver(env)
store.resolver = valueResolver
- // Disable auto-reload during initial load to prevent nested calls from
- // config-modifying operations inside configureProviders.
- store.autoReloadDisabled = true
- defer func() { store.autoReloadDisabled = false }()
+ // Hold reloadMu during initial load to prevent configureProviders
+ // from triggering auto-reload via RemoveConfigField.
+ store.reloadMu.Lock()
+ defer store.reloadMu.Unlock()
if err := cfg.configureProviders(store, env, valueResolver, store.knownProviders); err != nil {
return nil, fmt.Errorf("failed to configure providers: %w", err)
@@ -266,9 +266,11 @@ func (c *Config) configureProviders(store *ConfigStore, env env.Env, resolver Va
switch {
case p.ID == catwalk.InferenceProviderAnthropic && config.OAuthToken != nil:
// Claude Code subscription is not supported anymore. Remove to show onboarding.
- if !store.reloadInProgress {
- store.RemoveConfigField(ScopeGlobal, "providers.anthropic")
- }
+ // RemoveConfigField persists the deletion to disk. The in-memory
+ // state is kept consistent by the Providers.Del call below; any
+ // concurrent reload that races with this write will also see the
+ // removal because it re-reads from disk.
+ store.RemoveConfigField(ScopeGlobal, "providers.anthropic")
c.Providers.Del(string(p.ID))
continue
case p.ID == catwalk.InferenceProviderCopilot && config.OAuthToken != nil:
@@ -445,7 +447,7 @@ func (c *Config) setDefaults(workingDir, dataDir string) {
c.applyLSPDefaults()
// Add the default context paths if they are not already present
- c.Options.ContextPaths = append(defaultContextPaths, c.Options.ContextPaths...)
+ c.Options.ContextPaths = append(slices.Clone(defaultContextPaths), c.Options.ContextPaths...)
slices.Sort(c.Options.ContextPaths)
c.Options.ContextPaths = slices.Compact(c.Options.ContextPaths)
@@ -50,6 +50,10 @@ type RuntimeOverrides struct {
// RemoveConfigField, RefreshOAuthToken) to prevent both in-process
// goroutine races and, together with the shared lock.File, cross-process
// races on the config file.
+//
+// reloadMu serialises ReloadFromDisk calls to prevent concurrent reloads
+// from racing on store fields. autoReload uses TryLock on reloadMu to
+// skip redundant reloads when one is already in progress.
type ConfigStore struct {
config *Config
workingDir string
@@ -61,10 +65,9 @@ type ConfigStore struct {
overrides RuntimeOverrides
trackedConfigPaths []string // unique, normalized config file paths
snapshots map[string]fileSnapshot // path -> snapshot at last capture
- autoReloadDisabled bool // set during load/reload to prevent re-entrancy
- reloadInProgress bool // set during reload to avoid disk writes mid-reload
- mu sync.Mutex
+ mu sync.Mutex // serialises config file writes
+ reloadMu sync.Mutex // serialises ReloadFromDisk calls
}
// Config returns the pure-data config struct (read-only after load).
@@ -720,19 +723,18 @@ func (s *ConfigStore) captureStalenessSnapshot(paths []string) {
// ReloadFromDisk re-runs the config load/merge flow and updates the in-memory
// config atomically. It rebuilds the staleness snapshot after successful reload.
// On failure, the store state is rolled back to its previous state.
+// Concurrent calls are serialised via reloadMu.
func (s *ConfigStore) ReloadFromDisk(ctx context.Context) error {
if s.workingDir == "" {
return fmt.Errorf("cannot reload: working directory not set")
}
+ s.reloadMu.Lock()
+ defer s.reloadMu.Unlock()
+ return s.reloadFromDiskLocked(ctx)
+}
- // Disable auto-reload during reload to prevent nested/re-entrant calls.
- s.autoReloadDisabled = true
- s.reloadInProgress = true
- defer func() {
- s.autoReloadDisabled = false
- s.reloadInProgress = false
- }()
-
+// reloadFromDiskLocked performs the actual reload. Caller must hold reloadMu.
+func (s *ConfigStore) reloadFromDiskLocked(ctx context.Context) error {
// Migrate deprecated disable_notifications before reloading config.
migrateDisableNotifications()
@@ -835,11 +837,22 @@ func (s *ConfigStore) ReloadFromDisk(ctx context.Context) error {
// disabled during load/reload flows, or when working directory is not set
// (e.g., during testing). Only actual reload failures return an error.
func (s *ConfigStore) autoReload(ctx context.Context) error {
- if s.autoReloadDisabled {
- return nil // Expected skip: already in load/reload flow
- }
if s.workingDir == "" {
return nil // Expected skip: working directory not set
}
- return s.ReloadFromDisk(ctx)
+ // Skip if a reload is already in progress. This handles both
+ // concurrent auto-reloads after parallel writes and re-entrant
+ // calls from configureProviders during a reload.
+ //
+ // Note: if a write completes after the in-progress reload has
+ // already read the config file, that write won't be reflected in
+ // memory until the next reload. This is acceptable because writes
+ // are rare and the next user action or file-watch tick will pick
+ // up the change. Callers that need guaranteed fresh state after a
+ // write should call ReloadFromDisk explicitly.
+ if !s.reloadMu.TryLock() {
+ return nil
+ }
+ defer s.reloadMu.Unlock()
+ return s.reloadFromDiskLocked(ctx)
}
@@ -497,27 +497,21 @@ func TestAutoReloadDisabledDuringReload(t *testing.T) {
}`
require.NoError(t, os.WriteFile(configPath, []byte(initialConfig), 0o600))
- // Load will trigger configureProviders which removes anthropic OAuth config
- // This should NOT cause infinite recursion thanks to autoReloadDisabled guard
+ // Load will trigger configureProviders which removes anthropic OAuth config.
+ // This should NOT cause infinite recursion — reloadMu prevents re-entrant reloads.
store, err := Load(dir, dir, false)
require.NoError(t, err)
- // Verify the store loaded successfully and autoReloadDisabled was unset
- require.False(t, store.autoReloadDisabled)
-
// Capture snapshot and verify reload also works without recursion
store.globalDataPath = configPath
store.CaptureStalenessSnapshot([]string{configPath})
- // Modify file and reload - this should work without re-entrancy issues
+ // Modify file and reload — this should work without re-entrancy issues
time.Sleep(10 * time.Millisecond)
require.NoError(t, os.WriteFile(configPath, []byte(`{"options": {"debug": true}}`), 0o600))
err = store.ReloadFromDisk(context.Background())
require.NoError(t, err)
-
- // Verify reload completed successfully
- require.False(t, store.autoReloadDisabled, "autoReloadDisabled should be false after ReloadFromDisk")
}
// TestSetConfigFields_AutoReloadsAtomically verifies that SetConfigFields writes