diff --git a/internal/agent/coordinator.go b/internal/agent/coordinator.go index 2a75898ddd175198c245b0c26e577a8185796861..152f008c20014174e9f528b29f55147f42d66590 100644 --- a/internal/agent/coordinator.go +++ b/internal/agent/coordinator.go @@ -260,7 +260,7 @@ func (c *coordinator) Run(ctx context.Context, sessionID string, prompt string, } beforeLoaded := c.skillTracker.LoadedNames() var result *fantasy.AgentResult - var originalErr error = c.runWithUnauthorizedRetry(ctx, providerCfg, func() error { + originalErr := c.runWithUnauthorizedRetry(ctx, providerCfg, func() error { var err error result, err = run() return err diff --git a/internal/config/load.go b/internal/config/load.go index 599dcc257bb8c84436a04ad236c6a32f0ce22c0d..85c760e3e0f34f60117916c5bab2a9d0dd3b108b 100644 --- a/internal/config/load.go +++ b/internal/config/load.go @@ -108,10 +108,10 @@ func Load(workingDir, dataDir string, debug bool) (*ConfigStore, error) { valueResolver := NewShellVariableResolver(env) store.resolver = valueResolver - // Disable auto-reload during initial load to prevent nested calls from - // config-modifying operations inside configureProviders. - store.autoReloadDisabled = true - defer func() { store.autoReloadDisabled = false }() + // Hold reloadMu during initial load to prevent configureProviders + // from triggering auto-reload via RemoveConfigField. + store.reloadMu.Lock() + defer store.reloadMu.Unlock() if err := cfg.configureProviders(store, env, valueResolver, store.knownProviders); err != nil { return nil, fmt.Errorf("failed to configure providers: %w", err) @@ -266,9 +266,11 @@ func (c *Config) configureProviders(store *ConfigStore, env env.Env, resolver Va switch { case p.ID == catwalk.InferenceProviderAnthropic && config.OAuthToken != nil: // Claude Code subscription is not supported anymore. Remove to show onboarding. - if !store.reloadInProgress { - store.RemoveConfigField(ScopeGlobal, "providers.anthropic") - } + // RemoveConfigField persists the deletion to disk. The in-memory + // state is kept consistent by the Providers.Del call below; any + // concurrent reload that races with this write will also see the + // removal because it re-reads from disk. + store.RemoveConfigField(ScopeGlobal, "providers.anthropic") c.Providers.Del(string(p.ID)) continue case p.ID == catwalk.InferenceProviderCopilot && config.OAuthToken != nil: @@ -445,7 +447,7 @@ func (c *Config) setDefaults(workingDir, dataDir string) { c.applyLSPDefaults() // Add the default context paths if they are not already present - c.Options.ContextPaths = append(defaultContextPaths, c.Options.ContextPaths...) + c.Options.ContextPaths = append(slices.Clone(defaultContextPaths), c.Options.ContextPaths...) slices.Sort(c.Options.ContextPaths) c.Options.ContextPaths = slices.Compact(c.Options.ContextPaths) diff --git a/internal/config/store.go b/internal/config/store.go index 9d9cef7d1b368251b03a495d5f0a3c8e44b17b5a..9dd8ddd9596075faf857012e7b6faf1ddda1e142 100644 --- a/internal/config/store.go +++ b/internal/config/store.go @@ -50,6 +50,10 @@ type RuntimeOverrides struct { // RemoveConfigField, RefreshOAuthToken) to prevent both in-process // goroutine races and, together with the shared lock.File, cross-process // races on the config file. +// +// reloadMu serialises ReloadFromDisk calls to prevent concurrent reloads +// from racing on store fields. autoReload uses TryLock on reloadMu to +// skip redundant reloads when one is already in progress. type ConfigStore struct { config *Config workingDir string @@ -61,10 +65,9 @@ type ConfigStore struct { overrides RuntimeOverrides trackedConfigPaths []string // unique, normalized config file paths snapshots map[string]fileSnapshot // path -> snapshot at last capture - autoReloadDisabled bool // set during load/reload to prevent re-entrancy - reloadInProgress bool // set during reload to avoid disk writes mid-reload - mu sync.Mutex + mu sync.Mutex // serialises config file writes + reloadMu sync.Mutex // serialises ReloadFromDisk calls } // Config returns the pure-data config struct (read-only after load). @@ -720,19 +723,18 @@ func (s *ConfigStore) captureStalenessSnapshot(paths []string) { // ReloadFromDisk re-runs the config load/merge flow and updates the in-memory // config atomically. It rebuilds the staleness snapshot after successful reload. // On failure, the store state is rolled back to its previous state. +// Concurrent calls are serialised via reloadMu. func (s *ConfigStore) ReloadFromDisk(ctx context.Context) error { if s.workingDir == "" { return fmt.Errorf("cannot reload: working directory not set") } + s.reloadMu.Lock() + defer s.reloadMu.Unlock() + return s.reloadFromDiskLocked(ctx) +} - // Disable auto-reload during reload to prevent nested/re-entrant calls. - s.autoReloadDisabled = true - s.reloadInProgress = true - defer func() { - s.autoReloadDisabled = false - s.reloadInProgress = false - }() - +// reloadFromDiskLocked performs the actual reload. Caller must hold reloadMu. +func (s *ConfigStore) reloadFromDiskLocked(ctx context.Context) error { // Migrate deprecated disable_notifications before reloading config. migrateDisableNotifications() @@ -835,11 +837,22 @@ func (s *ConfigStore) ReloadFromDisk(ctx context.Context) error { // disabled during load/reload flows, or when working directory is not set // (e.g., during testing). Only actual reload failures return an error. func (s *ConfigStore) autoReload(ctx context.Context) error { - if s.autoReloadDisabled { - return nil // Expected skip: already in load/reload flow - } if s.workingDir == "" { return nil // Expected skip: working directory not set } - return s.ReloadFromDisk(ctx) + // Skip if a reload is already in progress. This handles both + // concurrent auto-reloads after parallel writes and re-entrant + // calls from configureProviders during a reload. + // + // Note: if a write completes after the in-progress reload has + // already read the config file, that write won't be reflected in + // memory until the next reload. This is acceptable because writes + // are rare and the next user action or file-watch tick will pick + // up the change. Callers that need guaranteed fresh state after a + // write should call ReloadFromDisk explicitly. + if !s.reloadMu.TryLock() { + return nil + } + defer s.reloadMu.Unlock() + return s.reloadFromDiskLocked(ctx) } diff --git a/internal/config/store_test.go b/internal/config/store_test.go index 5094f668896723d7eddf3907e400f1250b8f46ec..1e885a9dabb37b050849d78d0eb11f560c077264 100644 --- a/internal/config/store_test.go +++ b/internal/config/store_test.go @@ -497,27 +497,21 @@ func TestAutoReloadDisabledDuringReload(t *testing.T) { }` require.NoError(t, os.WriteFile(configPath, []byte(initialConfig), 0o600)) - // Load will trigger configureProviders which removes anthropic OAuth config - // This should NOT cause infinite recursion thanks to autoReloadDisabled guard + // Load will trigger configureProviders which removes anthropic OAuth config. + // This should NOT cause infinite recursion — reloadMu prevents re-entrant reloads. store, err := Load(dir, dir, false) require.NoError(t, err) - // Verify the store loaded successfully and autoReloadDisabled was unset - require.False(t, store.autoReloadDisabled) - // Capture snapshot and verify reload also works without recursion store.globalDataPath = configPath store.CaptureStalenessSnapshot([]string{configPath}) - // Modify file and reload - this should work without re-entrancy issues + // Modify file and reload — this should work without re-entrancy issues time.Sleep(10 * time.Millisecond) require.NoError(t, os.WriteFile(configPath, []byte(`{"options": {"debug": true}}`), 0o600)) err = store.ReloadFromDisk(context.Background()) require.NoError(t, err) - - // Verify reload completed successfully - require.False(t, store.autoReloadDisabled, "autoReloadDisabled should be false after ReloadFromDisk") } // TestSetConfigFields_AutoReloadsAtomically verifies that SetConfigFields writes