fix: skip image attachments in history when model doesn't support them (#2818)

Andrey Nering created

When switching from a vision-capable model to one without image support
mid-conversation, historical user messages containing image attachments
would cause API errors. Now those attachments are filtered out from the
prompt history based on the current model's SupportsImages capability.

💘 Generated with Crush

Assisted-by: Kimi K2.6 via Crush <crush@charm.land>

Change summary

internal/agent/agent.go      | 30 ++++++++++++++++++++---
internal/agent/agent_test.go | 49 ++++++++++++++++++++++++++++++++++++-
2 files changed, 73 insertions(+), 6 deletions(-)

Detailed changes

internal/agent/agent.go 🔗

@@ -245,7 +245,7 @@ func (a *sessionAgent) Run(ctx context.Context, call SessionAgentCall) (*fantasy
 	defer cancel()
 	defer a.activeRequests.Del(call.SessionID)
 
-	history, files := a.preparePrompt(msgs, call.Attachments...)
+	history, files := a.preparePrompt(msgs, largeModel.CatwalkCfg.SupportsImages, call.Attachments...)
 
 	startTime := time.Now()
 	a.eventPromptSent(call.SessionID)
@@ -643,7 +643,7 @@ func (a *sessionAgent) Summarize(ctx context.Context, sessionID string, opts fan
 		return nil
 	}
 
-	aiMsgs, _ := a.preparePrompt(msgs)
+	aiMsgs, _ := a.preparePrompt(msgs, largeModel.CatwalkCfg.SupportsImages)
 
 	genCtx, cancel := context.WithCancel(ctx)
 	a.activeRequests.Set(sessionID, cancel)
@@ -786,7 +786,7 @@ func (a *sessionAgent) createUserMessage(ctx context.Context, call SessionAgentC
 	return msg, nil
 }
 
-func (a *sessionAgent) preparePrompt(msgs []message.Message, attachments ...message.Attachment) ([]fantasy.Message, []fantasy.FilePart) {
+func (a *sessionAgent) preparePrompt(msgs []message.Message, supportsImages bool, attachments ...message.Attachment) ([]fantasy.Message, []fantasy.FilePart) {
 	var history []fantasy.Message
 	if !a.isSubAgent {
 		history = append(history, fantasy.NewUserMessage(
@@ -830,7 +830,15 @@ If not, please feel free to ignore. Again do not mention this message to the use
 			}
 			continue
 		}
-		history = append(history, m.ToAIMessage()...)
+		aiMsgs := m.ToAIMessage()
+		if !supportsImages {
+			for i := range aiMsgs {
+				if aiMsgs[i].Role == fantasy.MessageRoleUser {
+					aiMsgs[i].Content = filterFileParts(aiMsgs[i].Content)
+				}
+			}
+		}
+		history = append(history, aiMsgs...)
 
 		if m.Role == message.Assistant {
 			if msg, ok := syntheticToolResultsForOrphanedCalls(m, knownToolResultIDs); ok {
@@ -854,6 +862,20 @@ If not, please feel free to ignore. Again do not mention this message to the use
 	return history, files
 }
 
+// filterFileParts removes fantasy.FilePart entries from a slice of message
+// parts. Used to strip image attachments from historical user messages when
+// the current model does not support them.
+func filterFileParts(parts []fantasy.MessagePart) []fantasy.MessagePart {
+	filtered := make([]fantasy.MessagePart, 0, len(parts))
+	for _, part := range parts {
+		if _, ok := fantasy.AsMessagePart[fantasy.FilePart](part); ok {
+			continue
+		}
+		filtered = append(filtered, part)
+	}
+	return filtered
+}
+
 // filterOrphanedToolResults converts a tool message to a fantasy.Message,
 // dropping any tool result parts whose tool_call_id has no matching tool call
 // in the known set. An orphaned result causes API validation to fail on every

internal/agent/agent_test.go 🔗

@@ -656,6 +656,51 @@ func BenchmarkBuildSummaryPrompt(b *testing.B) {
 	}
 }
 
+func TestPreparePrompt_FiltersImageAttachments(t *testing.T) {
+	env := testEnv(t)
+	sa := testSessionAgent(env, nil, nil, "test prompt")
+	agent := sa.(*sessionAgent)
+
+	ctx := t.Context()
+	sess, err := env.sessions.Create(ctx, "test")
+	require.NoError(t, err)
+
+	// User message with text, a text attachment, and an image attachment.
+	_, err = env.messages.Create(ctx, sess.ID, message.CreateMessageParams{
+		Role: message.User,
+		Parts: []message.ContentPart{
+			message.TextContent{Text: "hello world"},
+			message.BinaryContent{Path: "notes.txt", MIMEType: "text/plain", Data: []byte("important notes")},
+			message.BinaryContent{Path: "image.png", MIMEType: "image/png", Data: []byte("fake-image-data")},
+		},
+	})
+	require.NoError(t, err)
+
+	msgs, err := env.messages.List(ctx, sess.ID)
+	require.NoError(t, err)
+
+	// When supportsImages is false, image attachments should be stripped.
+	history, _ := agent.preparePrompt(msgs, false)
+	// First message is the system reminder, second is the user message.
+	require.Len(t, history, 2)
+	require.Len(t, history[1].Content, 1)
+	text, ok := fantasy.AsMessagePart[fantasy.TextPart](history[1].Content[0])
+	require.True(t, ok)
+	require.Contains(t, text.Text, "hello world")
+	require.Contains(t, text.Text, "important notes")
+
+	// When supportsImages is true, image attachments should remain.
+	history, _ = agent.preparePrompt(msgs, true)
+	require.Len(t, history, 2)
+	require.Len(t, history[1].Content, 2)
+	text, ok = fantasy.AsMessagePart[fantasy.TextPart](history[1].Content[0])
+	require.True(t, ok)
+	require.Contains(t, text.Text, "hello world")
+	file, ok := fantasy.AsMessagePart[fantasy.FilePart](history[1].Content[1])
+	require.True(t, ok)
+	require.Equal(t, "image.png", file.Filename)
+}
+
 func TestPreparePrompt_OrphanedToolUse(t *testing.T) {
 	env := testEnv(t)
 	sa := testSessionAgent(env, nil, nil, "test prompt")
@@ -702,7 +747,7 @@ func TestPreparePrompt_OrphanedToolUse(t *testing.T) {
 	msgs, err := env.messages.List(ctx, sess.ID)
 	require.NoError(t, err)
 
-	history, _ := agent.preparePrompt(msgs)
+	history, _ := agent.preparePrompt(msgs, true)
 
 	// The history must contain a synthetic tool result for the orphaned call.
 	found := false
@@ -776,7 +821,7 @@ func TestPreparePrompt_OrphanedToolUseMixed(t *testing.T) {
 	msgs, err := env.messages.List(ctx, sess.ID)
 	require.NoError(t, err)
 
-	history, _ := agent.preparePrompt(msgs)
+	history, _ := agent.preparePrompt(msgs, true)
 
 	// Should have a synthetic result only for the orphaned call.
 	var syntheticCount int