From ac79e5bb0343722d4403607ff6bc6072e6e61081 Mon Sep 17 00:00:00 2001 From: Christian Rocha Date: Sat, 9 May 2026 20:23:48 -0400 Subject: [PATCH] fix(tools/view): detect image mime type; don't rely on extension (#2757) --- internal/agent/tools/view.go | 29 ++++++++++++++++++++++++++++ internal/agent/tools/view_test.go | 32 +++++++++++++++++++++++++++++++ 2 files changed, 61 insertions(+) diff --git a/internal/agent/tools/view.go b/internal/agent/tools/view.go index 3956a998698233aa194172551b5ab4f69b0d3a30..0460bdc19f7a99a38a18373ba1eb6e0a8af8c7a5 100644 --- a/internal/agent/tools/view.go +++ b/internal/agent/tools/view.go @@ -7,6 +7,7 @@ import ( "fmt" "io" "io/fs" + "net/http" "os" "path/filepath" "strings" @@ -188,6 +189,14 @@ func NewViewTool( return fantasy.ToolResponse{}, fmt.Errorf("error reading image file: %w", readErr) } + // Some tools save files with a mismatched extension + // (e.g. pinchtab writes JPEG bytes to a .png file). + // Providers like Anthropic strictly validate the + // media type against the base64 magic bytes and 400 + // on mismatch, so prefer the sniffed type whenever + // it identifies a supported image format. + mimeType = sniffImageMimeType(imageData, mimeType) + return fantasy.NewImageResponse(imageData, mimeType), nil } @@ -313,6 +322,26 @@ func getImageMimeType(filePath string) (bool, string) { } } +// sniffImageMimeType returns the content-sniffed MIME type when it identifies +// a supported image format. Otherwise it returns the provided fallback, which +// is usually the extension-derived type. Providers that validate the image +// media type against the base64 magic bytes (e.g. Anthropic) reject mismatched +// requests with a 400, so trusting the filename alone is unsafe. +func sniffImageMimeType(data []byte, fallback string) string { + sniffed := http.DetectContentType(data) + // http.DetectContentType may return the MIME with a ";" parameter + // (e.g. "image/svg+xml; charset=utf-8") although current image sniffers + // return bare types; strip defensively. + if i := strings.IndexByte(sniffed, ';'); i >= 0 { + sniffed = strings.TrimSpace(sniffed[:i]) + } + switch sniffed { + case "image/jpeg", "image/png", "image/gif", "image/webp": + return sniffed + } + return fallback +} + type LineScanner struct { scanner *bufio.Scanner } diff --git a/internal/agent/tools/view_test.go b/internal/agent/tools/view_test.go index 3293e0202c57a2990e280727ad8df9fea101ee22..574d831df3ac09a09c417ae2c2cd1f6448a21e3e 100644 --- a/internal/agent/tools/view_test.go +++ b/internal/agent/tools/view_test.go @@ -137,3 +137,35 @@ func TestReadBuiltinFile(t *testing.T) { require.NotContains(t, resp.Content, " 1|") }) } + +func TestSniffImageMimeType(t *testing.T) { + t.Parallel() + + jpegMagic := []byte{0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 'J', 'F', 'I', 'F'} + pngMagic := []byte{0x89, 'P', 'N', 'G', 0x0d, 0x0a, 0x1a, 0x0a} + gifMagic := []byte("GIF89a") + // Minimal RIFF/WEBP header. + webpMagic := append([]byte("RIFF\x00\x00\x00\x00WEBPVP8 "), make([]byte, 16)...) + random := []byte("not an image at all, just text") + + cases := []struct { + name string + data []byte + fallback string + want string + }{ + {"jpeg bytes in .png file uses sniffed", jpegMagic, "image/png", "image/jpeg"}, + {"png bytes in .jpg file uses sniffed", pngMagic, "image/jpeg", "image/png"}, + {"gif bytes uses sniffed", gifMagic, "image/png", "image/gif"}, + {"webp bytes uses sniffed", webpMagic, "image/png", "image/webp"}, + {"matching extension and content keeps sniffed", pngMagic, "image/png", "image/png"}, + {"unsniffable content falls back", random, "image/png", "image/png"}, + {"empty content falls back", nil, "image/jpeg", "image/jpeg"}, + } + for _, tc := range cases { + t.Run(tc.name, func(t *testing.T) { + t.Parallel() + require.Equal(t, tc.want, sniffImageMimeType(tc.data, tc.fallback)) + }) + } +}