shelley: send x-session-affinity header to Fireworks for prompt caching

Philip Zeyliger and Shelley created

Prompt: In a new worktree make the fireworks client send th conversation id as the session hint to enable prompt caching.

Add the conversation ID as x-session-affinity header when making requests
to Fireworks. This enables Fireworks' prompt caching feature which reduces
time to first token by up to 80% and reduces costs for cached prompt tokens.

The header is only added when:
1. A conversation ID is present in the context
2. The provider is 'fireworks'

Co-authored-by: Shelley <shelley@exe.dev>

Change summary

llm/llmhttp/llmhttp.go      |  5 ++++
llm/llmhttp/llmhttp_test.go | 41 +++++++++++++++++++++++++++++++++++++++
2 files changed, 46 insertions(+)

Detailed changes

llm/llmhttp/llmhttp.go 🔗

@@ -88,6 +88,11 @@ func (t *Transport) RoundTrip(req *http.Request) (*http.Response, error) {
 	// Add conversation ID header if present
 	if conversationID := ConversationIDFromContext(req.Context()); conversationID != "" {
 		req.Header.Set("Shelley-Conversation-Id", conversationID)
+
+		// Add x-session-affinity header for Fireworks to enable prompt caching
+		if ProviderFromContext(req.Context()) == "fireworks" {
+			req.Header.Set("x-session-affinity", conversationID)
+		}
 	}
 
 	// Read and store the request body for recording

llm/llmhttp/llmhttp_test.go 🔗

@@ -76,6 +76,47 @@ func TestTransportAddsHeaders(t *testing.T) {
 	if got := receivedHeaders.Get("Shelley-Conversation-Id"); got != "test-conv-id" {
 		t.Errorf("Shelley-Conversation-Id = %q, want %q", got, "test-conv-id")
 	}
+
+	// Verify x-session-affinity is NOT added for non-fireworks providers
+	if got := receivedHeaders.Get("x-session-affinity"); got != "" {
+		t.Errorf("x-session-affinity = %q, want empty for non-fireworks", got)
+	}
+}
+
+func TestTransportAddsSessionAffinityForFireworks(t *testing.T) {
+	// Create a test server that echoes request headers
+	var receivedHeaders http.Header
+	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+		receivedHeaders = r.Header.Clone()
+		w.WriteHeader(http.StatusOK)
+		w.Write([]byte("ok"))
+	}))
+	defer server.Close()
+
+	// Create client with our transport
+	client := NewClient(nil, nil)
+
+	// Make a request with conversation ID and provider=fireworks in context
+	ctx := context.Background()
+	ctx = WithConversationID(ctx, "test-conv-id")
+	ctx = WithProvider(ctx, "fireworks")
+	req, _ := http.NewRequestWithContext(ctx, "GET", server.URL, nil)
+
+	resp, err := client.Do(req)
+	if err != nil {
+		t.Fatalf("Request failed: %v", err)
+	}
+	resp.Body.Close()
+
+	// Verify x-session-affinity header was added for fireworks
+	if got := receivedHeaders.Get("x-session-affinity"); got != "test-conv-id" {
+		t.Errorf("x-session-affinity = %q, want %q", got, "test-conv-id")
+	}
+
+	// Verify Shelley-Conversation-Id header was also added
+	if got := receivedHeaders.Get("Shelley-Conversation-Id"); got != "test-conv-id" {
+		t.Errorf("Shelley-Conversation-Id = %q, want %q", got, "test-conv-id")
+	}
 }
 
 func TestTransportRecordsRequest(t *testing.T) {