From 81170ce5d33280947e889ba8afb93a057d26b801 Mon Sep 17 00:00:00 2001 From: Kieran Klukas Date: Fri, 29 May 2026 14:38:28 -0400 Subject: [PATCH] refactor(config): replace reload booleans with reloadMu mutex Replace autoReloadDisabled and reloadInProgress boolean guards with a sync.Mutex (reloadMu) to serialize ReloadFromDisk calls. autoReload uses TryLock to skip redundant reloads when one is already in progress, handling both concurrent auto-reloads after parallel writes and re-entrant calls from configureProviders during a reload. The autoReload comment now honestly documents the lost-update window: if a write completes after the in-progress reload has already read the config file, that change won't appear in memory until the next reload. Callers needing guaranteed freshness should call ReloadFromDisk. The load.go RemoveConfigField comment is corrected to explain that Providers.Del keeps in-memory state consistent, not reload magic. Also fixes an aliasing bug where append could mutate the package-level defaultContextPaths slice by cloning it before prepending. --- internal/agent/coordinator.go | 2 +- internal/config/load.go | 18 ++++++++------- internal/config/store.go | 43 +++++++++++++++++++++++------------ internal/config/store_test.go | 12 +++------- 4 files changed, 42 insertions(+), 33 deletions(-) diff --git a/internal/agent/coordinator.go b/internal/agent/coordinator.go index 2a75898ddd175198c245b0c26e577a8185796861..152f008c20014174e9f528b29f55147f42d66590 100644 --- a/internal/agent/coordinator.go +++ b/internal/agent/coordinator.go @@ -260,7 +260,7 @@ func (c *coordinator) Run(ctx context.Context, sessionID string, prompt string, } beforeLoaded := c.skillTracker.LoadedNames() var result *fantasy.AgentResult - var originalErr error = c.runWithUnauthorizedRetry(ctx, providerCfg, func() error { + originalErr := c.runWithUnauthorizedRetry(ctx, providerCfg, func() error { var err error result, err = run() return err diff --git a/internal/config/load.go b/internal/config/load.go index 599dcc257bb8c84436a04ad236c6a32f0ce22c0d..85c760e3e0f34f60117916c5bab2a9d0dd3b108b 100644 --- a/internal/config/load.go +++ b/internal/config/load.go @@ -108,10 +108,10 @@ func Load(workingDir, dataDir string, debug bool) (*ConfigStore, error) { valueResolver := NewShellVariableResolver(env) store.resolver = valueResolver - // Disable auto-reload during initial load to prevent nested calls from - // config-modifying operations inside configureProviders. - store.autoReloadDisabled = true - defer func() { store.autoReloadDisabled = false }() + // Hold reloadMu during initial load to prevent configureProviders + // from triggering auto-reload via RemoveConfigField. + store.reloadMu.Lock() + defer store.reloadMu.Unlock() if err := cfg.configureProviders(store, env, valueResolver, store.knownProviders); err != nil { return nil, fmt.Errorf("failed to configure providers: %w", err) @@ -266,9 +266,11 @@ func (c *Config) configureProviders(store *ConfigStore, env env.Env, resolver Va switch { case p.ID == catwalk.InferenceProviderAnthropic && config.OAuthToken != nil: // Claude Code subscription is not supported anymore. Remove to show onboarding. - if !store.reloadInProgress { - store.RemoveConfigField(ScopeGlobal, "providers.anthropic") - } + // RemoveConfigField persists the deletion to disk. The in-memory + // state is kept consistent by the Providers.Del call below; any + // concurrent reload that races with this write will also see the + // removal because it re-reads from disk. + store.RemoveConfigField(ScopeGlobal, "providers.anthropic") c.Providers.Del(string(p.ID)) continue case p.ID == catwalk.InferenceProviderCopilot && config.OAuthToken != nil: @@ -445,7 +447,7 @@ func (c *Config) setDefaults(workingDir, dataDir string) { c.applyLSPDefaults() // Add the default context paths if they are not already present - c.Options.ContextPaths = append(defaultContextPaths, c.Options.ContextPaths...) + c.Options.ContextPaths = append(slices.Clone(defaultContextPaths), c.Options.ContextPaths...) slices.Sort(c.Options.ContextPaths) c.Options.ContextPaths = slices.Compact(c.Options.ContextPaths) diff --git a/internal/config/store.go b/internal/config/store.go index 9d9cef7d1b368251b03a495d5f0a3c8e44b17b5a..9dd8ddd9596075faf857012e7b6faf1ddda1e142 100644 --- a/internal/config/store.go +++ b/internal/config/store.go @@ -50,6 +50,10 @@ type RuntimeOverrides struct { // RemoveConfigField, RefreshOAuthToken) to prevent both in-process // goroutine races and, together with the shared lock.File, cross-process // races on the config file. +// +// reloadMu serialises ReloadFromDisk calls to prevent concurrent reloads +// from racing on store fields. autoReload uses TryLock on reloadMu to +// skip redundant reloads when one is already in progress. type ConfigStore struct { config *Config workingDir string @@ -61,10 +65,9 @@ type ConfigStore struct { overrides RuntimeOverrides trackedConfigPaths []string // unique, normalized config file paths snapshots map[string]fileSnapshot // path -> snapshot at last capture - autoReloadDisabled bool // set during load/reload to prevent re-entrancy - reloadInProgress bool // set during reload to avoid disk writes mid-reload - mu sync.Mutex + mu sync.Mutex // serialises config file writes + reloadMu sync.Mutex // serialises ReloadFromDisk calls } // Config returns the pure-data config struct (read-only after load). @@ -720,19 +723,18 @@ func (s *ConfigStore) captureStalenessSnapshot(paths []string) { // ReloadFromDisk re-runs the config load/merge flow and updates the in-memory // config atomically. It rebuilds the staleness snapshot after successful reload. // On failure, the store state is rolled back to its previous state. +// Concurrent calls are serialised via reloadMu. func (s *ConfigStore) ReloadFromDisk(ctx context.Context) error { if s.workingDir == "" { return fmt.Errorf("cannot reload: working directory not set") } + s.reloadMu.Lock() + defer s.reloadMu.Unlock() + return s.reloadFromDiskLocked(ctx) +} - // Disable auto-reload during reload to prevent nested/re-entrant calls. - s.autoReloadDisabled = true - s.reloadInProgress = true - defer func() { - s.autoReloadDisabled = false - s.reloadInProgress = false - }() - +// reloadFromDiskLocked performs the actual reload. Caller must hold reloadMu. +func (s *ConfigStore) reloadFromDiskLocked(ctx context.Context) error { // Migrate deprecated disable_notifications before reloading config. migrateDisableNotifications() @@ -835,11 +837,22 @@ func (s *ConfigStore) ReloadFromDisk(ctx context.Context) error { // disabled during load/reload flows, or when working directory is not set // (e.g., during testing). Only actual reload failures return an error. func (s *ConfigStore) autoReload(ctx context.Context) error { - if s.autoReloadDisabled { - return nil // Expected skip: already in load/reload flow - } if s.workingDir == "" { return nil // Expected skip: working directory not set } - return s.ReloadFromDisk(ctx) + // Skip if a reload is already in progress. This handles both + // concurrent auto-reloads after parallel writes and re-entrant + // calls from configureProviders during a reload. + // + // Note: if a write completes after the in-progress reload has + // already read the config file, that write won't be reflected in + // memory until the next reload. This is acceptable because writes + // are rare and the next user action or file-watch tick will pick + // up the change. Callers that need guaranteed fresh state after a + // write should call ReloadFromDisk explicitly. + if !s.reloadMu.TryLock() { + return nil + } + defer s.reloadMu.Unlock() + return s.reloadFromDiskLocked(ctx) } diff --git a/internal/config/store_test.go b/internal/config/store_test.go index 5094f668896723d7eddf3907e400f1250b8f46ec..1e885a9dabb37b050849d78d0eb11f560c077264 100644 --- a/internal/config/store_test.go +++ b/internal/config/store_test.go @@ -497,27 +497,21 @@ func TestAutoReloadDisabledDuringReload(t *testing.T) { }` require.NoError(t, os.WriteFile(configPath, []byte(initialConfig), 0o600)) - // Load will trigger configureProviders which removes anthropic OAuth config - // This should NOT cause infinite recursion thanks to autoReloadDisabled guard + // Load will trigger configureProviders which removes anthropic OAuth config. + // This should NOT cause infinite recursion — reloadMu prevents re-entrant reloads. store, err := Load(dir, dir, false) require.NoError(t, err) - // Verify the store loaded successfully and autoReloadDisabled was unset - require.False(t, store.autoReloadDisabled) - // Capture snapshot and verify reload also works without recursion store.globalDataPath = configPath store.CaptureStalenessSnapshot([]string{configPath}) - // Modify file and reload - this should work without re-entrancy issues + // Modify file and reload — this should work without re-entrancy issues time.Sleep(10 * time.Millisecond) require.NoError(t, os.WriteFile(configPath, []byte(`{"options": {"debug": true}}`), 0o600)) err = store.ReloadFromDisk(context.Background()) require.NoError(t, err) - - // Verify reload completed successfully - require.False(t, store.autoReloadDisabled, "autoReloadDisabled should be false after ReloadFromDisk") } // TestSetConfigFields_AutoReloadsAtomically verifies that SetConfigFields writes