fix(tools/view): detect image mime type; don't rely on extension (#2757)

Christian Rocha created

Change summary

internal/agent/tools/view.go      | 29 +++++++++++++++++++++++++++++
internal/agent/tools/view_test.go | 32 ++++++++++++++++++++++++++++++++
2 files changed, 61 insertions(+)

Detailed changes

internal/agent/tools/view.go 🔗

@@ -7,6 +7,7 @@ import (
 	"fmt"
 	"io"
 	"io/fs"
+	"net/http"
 	"os"
 	"path/filepath"
 	"strings"
@@ -188,6 +189,14 @@ func NewViewTool(
 					return fantasy.ToolResponse{}, fmt.Errorf("error reading image file: %w", readErr)
 				}
 
+				// Some tools save files with a mismatched extension
+				// (e.g. pinchtab writes JPEG bytes to a .png file).
+				// Providers like Anthropic strictly validate the
+				// media type against the base64 magic bytes and 400
+				// on mismatch, so prefer the sniffed type whenever
+				// it identifies a supported image format.
+				mimeType = sniffImageMimeType(imageData, mimeType)
+
 				return fantasy.NewImageResponse(imageData, mimeType), nil
 			}
 
@@ -313,6 +322,26 @@ func getImageMimeType(filePath string) (bool, string) {
 	}
 }
 
+// sniffImageMimeType returns the content-sniffed MIME type when it identifies
+// a supported image format. Otherwise it returns the provided fallback, which
+// is usually the extension-derived type. Providers that validate the image
+// media type against the base64 magic bytes (e.g. Anthropic) reject mismatched
+// requests with a 400, so trusting the filename alone is unsafe.
+func sniffImageMimeType(data []byte, fallback string) string {
+	sniffed := http.DetectContentType(data)
+	// http.DetectContentType may return the MIME with a ";" parameter
+	// (e.g. "image/svg+xml; charset=utf-8") although current image sniffers
+	// return bare types; strip defensively.
+	if i := strings.IndexByte(sniffed, ';'); i >= 0 {
+		sniffed = strings.TrimSpace(sniffed[:i])
+	}
+	switch sniffed {
+	case "image/jpeg", "image/png", "image/gif", "image/webp":
+		return sniffed
+	}
+	return fallback
+}
+
 type LineScanner struct {
 	scanner *bufio.Scanner
 }

internal/agent/tools/view_test.go 🔗

@@ -137,3 +137,35 @@ func TestReadBuiltinFile(t *testing.T) {
 		require.NotContains(t, resp.Content, "     1|")
 	})
 }
+
+func TestSniffImageMimeType(t *testing.T) {
+	t.Parallel()
+
+	jpegMagic := []byte{0xff, 0xd8, 0xff, 0xe0, 0x00, 0x10, 'J', 'F', 'I', 'F'}
+	pngMagic := []byte{0x89, 'P', 'N', 'G', 0x0d, 0x0a, 0x1a, 0x0a}
+	gifMagic := []byte("GIF89a")
+	// Minimal RIFF/WEBP header.
+	webpMagic := append([]byte("RIFF\x00\x00\x00\x00WEBPVP8 "), make([]byte, 16)...)
+	random := []byte("not an image at all, just text")
+
+	cases := []struct {
+		name     string
+		data     []byte
+		fallback string
+		want     string
+	}{
+		{"jpeg bytes in .png file uses sniffed", jpegMagic, "image/png", "image/jpeg"},
+		{"png bytes in .jpg file uses sniffed", pngMagic, "image/jpeg", "image/png"},
+		{"gif bytes uses sniffed", gifMagic, "image/png", "image/gif"},
+		{"webp bytes uses sniffed", webpMagic, "image/png", "image/webp"},
+		{"matching extension and content keeps sniffed", pngMagic, "image/png", "image/png"},
+		{"unsniffable content falls back", random, "image/png", "image/png"},
+		{"empty content falls back", nil, "image/jpeg", "image/jpeg"},
+	}
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+			require.Equal(t, tc.want, sniffImageMimeType(tc.data, tc.fallback))
+		})
+	}
+}