config: fail provider header expansion loudly and drop empty values

Christian Rocha and Charm Crush created

Provider extra_headers used to log and keep the raw template when a
command inside the header failed. A failing command now stops the
provider from loading, with a clear error that names the provider
and the header.

Headers whose value ends up as an empty string are dropped from the
outgoing request, so "OpenAI-Organization": "$OPENAI_ORG_ID" keeps
working when the variable is not set. MCP headers follow the same
rule.

Co-Authored-By: Charm Crush <crush@charm.land>

Change summary

internal/config/config.go            |  10 +
internal/config/load.go              |  25 +++-
internal/config/load_test.go         | 152 +++++++++++++++++++++++------
internal/config/resolve_real_test.go |  65 ++++++++++++
4 files changed, 210 insertions(+), 42 deletions(-)

Detailed changes

internal/config/config.go 🔗

@@ -368,6 +368,13 @@ func (m MCPConfig) ResolvedURL(r VariableResolver) (string, error) {
 // already sanitized by ResolveValue and is wrapped with %w so
 // errors.Is/As continues to work.
 //
+// A header whose value resolves to the empty string (unset bare $VAR
+// under lenient nounset, $(echo), or literal "") is omitted from the
+// returned map — sending "X-Auth:" with an empty value is rejected by
+// some providers and the user's intent in "optional, env-gated
+// header" is clearly "absent when the var isn't set." See PLAN.md
+// Phase 2 design decision #18.
+//
 // See ResolvedEnv for guidance on picking a resolver.
 func (m MCPConfig) ResolvedHeaders(r VariableResolver) (map[string]string, error) {
 	if len(m.Headers) == 0 {
@@ -386,6 +393,9 @@ func (m MCPConfig) ResolvedHeaders(r VariableResolver) (map[string]string, error
 		if err != nil {
 			return nil, fmt.Errorf("header %s: %w", k, err)
 		}
+		if v == "" {
+			continue
+		}
 		out[k] = v
 	}
 	return out, nil

internal/config/load.go 🔗

@@ -222,16 +222,19 @@ func (c *Config) configureProviders(store *ConfigStore, env env.Env, resolver Va
 		if len(config.ExtraHeaders) > 0 {
 			maps.Copy(headers, config.ExtraHeaders)
 		}
-		// Intentional divergence from MCP env/headers/args/url resolution:
-		// provider headers that fail to resolve log and keep their literal
-		// template so providers with optional, env-gated headers still
-		// load on hosts where those vars are unset. Changing this to a
-		// hard error would break those configs. See PLAN.md "Design
-		// decisions" item 4 for the full rationale.
+		// Provider headers use the same error contract as MCP headers:
+		// a failing $(...) aborts the provider load with a clear
+		// message, and a header that resolves to the empty string
+		// (unset bare $VAR under lenient nounset, $(echo), or literal
+		// "") is dropped from the outgoing request. See PLAN.md
+		// Phase 2 design decisions #14 and #18.
 		for k, v := range headers {
 			resolved, err := resolver.ResolveValue(v)
 			if err != nil {
-				slog.Error("Could not resolve provider header", "err", err.Error())
+				return fmt.Errorf("resolving provider %s header %q: %w", p.ID, k, err)
+			}
+			if resolved == "" {
+				delete(headers, k)
 				continue
 			}
 			headers[k] = resolved
@@ -382,10 +385,16 @@ func (c *Config) configureProviders(store *ConfigStore, env env.Env, resolver Va
 			continue
 		}
 
+		// Custom-provider headers share the MCP error contract; see
+		// the known-provider loop above and PLAN.md Phase 2 design
+		// decisions #14 and #18.
 		for k, v := range providerConfig.ExtraHeaders {
 			resolved, err := resolver.ResolveValue(v)
 			if err != nil {
-				slog.Error("Could not resolve provider header", "err", err.Error())
+				return fmt.Errorf("resolving provider %s header %q: %w", id, k, err)
+			}
+			if resolved == "" {
+				delete(providerConfig.ExtraHeaders, k)
 				continue
 			}
 			providerConfig.ExtraHeaders[k] = resolved

internal/config/load_test.go 🔗

@@ -1590,18 +1590,13 @@ func TestConfig_configureProviders_HyperAPIKeyFromConfigOverrides(t *testing.T)
 	require.Equal(t, "env-api-key", pc.APIKey)
 }
 
-// TestConfig_configureProviders_ProviderHeaderResolveFailure pins the
-// intentional divergence at load.go:225: a provider header whose
-// expansion fails must NOT fail the whole config load. It logs an
-// error, keeps the literal template in the resolved header map, and
-// moves on. The MCP contract (hard error on failed expansion) does not
-// apply here because many providers ship DefaultHeaders that reference
-// env vars which are legitimately unset on most hosts.
-//
-// If this test ever flips to requiring an error, read PLAN.md "Design
-// decisions" item 4 before changing the production code — the
-// divergence is deliberate.
-func TestConfig_configureProviders_ProviderHeaderResolveFailure(t *testing.T) {
+// TestConfig_configureProviders_ProviderHeaderResolveError pins
+// Phase 2 design decision #14: a failing $(cmd) in a provider header
+// must fail the provider load with a clear message that names the
+// offending header. The Phase 1 log-and-continue divergence at
+// load.go:225 is gone; provider headers now share the MCP error
+// contract.
+func TestConfig_configureProviders_ProviderHeaderResolveError(t *testing.T) {
 	knownProviders := []catwalk.Provider{
 		{
 			ID:          "openai",
@@ -1615,17 +1610,9 @@ func TestConfig_configureProviders_ProviderHeaderResolveFailure(t *testing.T) {
 		Providers: csync.NewMapFrom(map[string]ProviderConfig{
 			"openai": {
 				ExtraHeaders: map[string]string{
-					// Failing $(...) — inner command exits 1, no stdout.
+					// Failing $(...) — inner command exits 1. Must
+					// propagate as an error, not a silent truncation.
 					"X-Broken": "$(false)",
-					// Unset var — under Phase 2 lenient nounset this
-					// resolves cleanly to "" rather than erroring;
-					// Step 10 will rewrite this whole test once the
-					// empty-drop rule replaces the log-and-continue
-					// loop at load.go:225.
-					"X-Missing": "$PROVIDER_HEADER_NEVER_SET",
-					// Happy path: must still be resolved, proving the
-					// failure in X-Broken did not abort the loop.
-					"X-Static": "kept-literal",
 				},
 			},
 		}),
@@ -1639,20 +1626,117 @@ func TestConfig_configureProviders_ProviderHeaderResolveFailure(t *testing.T) {
 	resolver := NewShellVariableResolver(testEnv)
 
 	err := cfg.configureProviders(testStore(cfg), testEnv, resolver, knownProviders)
-	require.NoError(t, err, "provider load must tolerate failing header expansion")
+	require.Error(t, err, "failing $(cmd) in a header must fail the provider load")
+	require.Contains(t, err.Error(), "X-Broken", "error must name the offending header")
+}
+
+// TestConfig_configureProviders_CatwalkDefaultWithUnsetVarLoads pins
+// Phase 2 design decisions #11 and #18 from the provider angle: a
+// Catwalk-style default header like
+// "OpenAI-Organization": "$OPENAI_ORG_ID" must load cleanly under
+// lenient nounset (unset → "" → header dropped), not fail the load
+// and not leave the literal template on the wire.
+func TestConfig_configureProviders_CatwalkDefaultWithUnsetVarLoads(t *testing.T) {
+	knownProviders := []catwalk.Provider{
+		{
+			ID:          "openai",
+			APIKey:      "$OPENAI_API_KEY",
+			APIEndpoint: "https://api.openai.com/v1",
+			Models:      []catwalk.Model{{ID: "test-model"}},
+			DefaultHeaders: map[string]string{
+				"OpenAI-Organization": "$OPENAI_ORG_ID",
+			},
+		},
+	}
+
+	cfg := &Config{}
+	cfg.setDefaults("/tmp", "")
+
+	testEnv := env.NewFromMap(map[string]string{
+		"OPENAI_API_KEY": "test-key",
+		"PATH":           os.Getenv("PATH"),
+	})
+	resolver := NewShellVariableResolver(testEnv)
+
+	err := cfg.configureProviders(testStore(cfg), testEnv, resolver, knownProviders)
+	require.NoError(t, err, "optional env-gated header must not fail the load")
 
 	pc, ok := cfg.Providers.Get("openai")
 	require.True(t, ok, "openai provider must still be configured")
+	_, present := pc.ExtraHeaders["OpenAI-Organization"]
+	require.False(t, present, "header whose value resolves to empty must be absent")
+}
+
+// TestConfig_configureProviders_LiteralEmptyHeaderDropped pins design
+// decision #18 for the literal case: a user-authored
+// "X-Custom": "" in extra_headers is absent from the resolved map.
+// Applies to both known- and custom-provider paths; this test
+// exercises the custom-provider loop.
+func TestConfig_configureProviders_LiteralEmptyHeaderDropped(t *testing.T) {
+	cfg := &Config{
+		Providers: csync.NewMapFrom(map[string]ProviderConfig{
+			"my-llm": {
+				APIKey:  "test-key",
+				BaseURL: "https://my-llm.example.com/v1",
+				Type:    catwalk.TypeOpenAI,
+				Models:  []catwalk.Model{{ID: "m"}},
+				ExtraHeaders: map[string]string{
+					"X-Custom": "",
+					"X-Kept":   "present",
+				},
+			},
+		}),
+	}
+	cfg.setDefaults("/tmp", "")
 
-	// X-Broken still errors on $(false) so its literal template is
-	// preserved by the log-and-continue loop. X-Missing resolves
-	// cleanly to "" under lenient nounset: the current divergence
-	// only kicks in on resolver errors, so a non-erroring empty
-	// expansion writes through. The happy-path header is resolved
-	// through the shell (pass-through for a literal value). Step 10
-	// will flip this whole surface to the MCP contract plus the
-	// empty-drop rule from design decision #18.
-	require.Equal(t, "$(false)", pc.ExtraHeaders["X-Broken"])
-	require.Equal(t, "", pc.ExtraHeaders["X-Missing"])
-	require.Equal(t, "kept-literal", pc.ExtraHeaders["X-Static"])
+	testEnv := env.NewFromMap(map[string]string{
+		"PATH": os.Getenv("PATH"),
+	})
+	resolver := NewShellVariableResolver(testEnv)
+
+	err := cfg.configureProviders(testStore(cfg), testEnv, resolver, []catwalk.Provider{})
+	require.NoError(t, err)
+
+	pc, ok := cfg.Providers.Get("my-llm")
+	require.True(t, ok)
+	_, present := pc.ExtraHeaders["X-Custom"]
+	require.False(t, present, "literal empty-string header must be dropped")
+	require.Equal(t, "present", pc.ExtraHeaders["X-Kept"])
+}
+
+// TestConfig_configureProviders_EchoEmptyHeaderDropped pins design
+// decision #18 for the non-failing empty case: $(echo) exits 0 with
+// empty output, resolves cleanly to "", and must be dropped the same
+// way an unset bare $VAR is. Exercises the known-provider loop.
+func TestConfig_configureProviders_EchoEmptyHeaderDropped(t *testing.T) {
+	knownProviders := []catwalk.Provider{
+		{
+			ID:          "openai",
+			APIKey:      "$OPENAI_API_KEY",
+			APIEndpoint: "https://api.openai.com/v1",
+			Models:      []catwalk.Model{{ID: "test-model"}},
+			DefaultHeaders: map[string]string{
+				"X-Empty": "$(echo)",
+				"X-Kept":  "present",
+			},
+		},
+	}
+
+	cfg := &Config{}
+	cfg.setDefaults("/tmp", "")
+
+	testEnv := env.NewFromMap(map[string]string{
+		"OPENAI_API_KEY": "test-key",
+		"PATH":           os.Getenv("PATH"),
+	})
+	resolver := NewShellVariableResolver(testEnv)
+
+	err := cfg.configureProviders(testStore(cfg), testEnv, resolver, knownProviders)
+	require.NoError(t, err)
+
+	pc, ok := cfg.Providers.Get("openai")
+	require.True(t, ok)
+	_, present := pc.ExtraHeaders["X-Empty"]
+	require.False(t, present, "$(echo) → empty → header must be dropped")
+	require.Equal(t, "present", pc.ExtraHeaders["X-Kept"])
 }

internal/config/resolve_real_test.go 🔗

@@ -259,6 +259,71 @@ func TestResolvedHeaders_RealShell_FailurePreservesOriginal(t *testing.T) {
 	require.Equal(t, orig, m.Headers, "receiver Headers must be preserved")
 }
 
+// TestResolvedHeaders_RealShell_DropEmpty pins Phase 2 design
+// decision #18 on the MCP side: a header whose value resolves to the
+// empty string is omitted from the returned map. Covers the three
+// ways a value can legitimately land on empty — unset bare $VAR
+// under lenient nounset, a literal "", and a non-failing command
+// whose stdout is empty — and also pins that a failing $(cmd) still
+// errors rather than silently dropping.
+func TestResolvedHeaders_RealShell_DropEmpty(t *testing.T) {
+	t.Parallel()
+
+	t.Run("unset $VAR is absent", func(t *testing.T) {
+		t.Parallel()
+		m := MCPConfig{Headers: map[string]string{
+			"X-Missing": "$MCP_HEADER_NEVER_SET",
+			"X-Kept":    "present",
+		}}
+		got, err := m.ResolvedHeaders(realShellResolver(nil))
+		require.NoError(t, err)
+		_, present := got["X-Missing"]
+		require.False(t, present, "unset bare $VAR → empty → header dropped")
+		require.Equal(t, "present", got["X-Kept"])
+	})
+
+	t.Run("literal empty string is absent", func(t *testing.T) {
+		t.Parallel()
+		m := MCPConfig{Headers: map[string]string{
+			"X-Custom": "",
+			"X-Kept":   "present",
+		}}
+		got, err := m.ResolvedHeaders(realShellResolver(nil))
+		require.NoError(t, err)
+		_, present := got["X-Custom"]
+		require.False(t, present, "literal empty-string header must be dropped")
+		require.Equal(t, "present", got["X-Kept"])
+	})
+
+	t.Run("$(echo) is absent", func(t *testing.T) {
+		t.Parallel()
+		m := MCPConfig{Headers: map[string]string{
+			"X-Empty": "$(echo)",
+			"X-Kept":  "present",
+		}}
+		got, err := m.ResolvedHeaders(realShellResolver(nil))
+		require.NoError(t, err)
+		_, present := got["X-Empty"]
+		require.False(t, present, "$(echo) → empty → header dropped")
+		require.Equal(t, "present", got["X-Kept"])
+	})
+
+	t.Run("$(false) errors and does not mutate", func(t *testing.T) {
+		t.Parallel()
+		m := MCPConfig{Headers: map[string]string{
+			"X-Broken": "$(false)",
+			"X-Kept":   "present",
+		}}
+		orig := maps.Clone(m.Headers)
+
+		got, err := m.ResolvedHeaders(realShellResolver(nil))
+		require.Error(t, err)
+		require.Empty(t, got, "map must be nil/empty on failure, not a partial")
+		require.Contains(t, err.Error(), "header X-Broken")
+		require.Equal(t, orig, m.Headers, "receiver Headers must be preserved")
+	})
+}
+
 // TestResolvedArgs_RealShell exercises both success and failure for
 // m.Args symmetrically with Env. Args are ordered so error messages
 // must identify a positional index, not a key.