1package openai
2
3import (
4 "encoding/base64"
5 "testing"
6
7 "charm.land/fantasy"
8 "github.com/stretchr/testify/require"
9)
10
11// Tool messages in the OpenAI Chat Completions API cannot carry image or audio
12// content directly — the SDK's content union only accepts text. When a tool
13// returns media, DefaultToPrompt must still emit a text tool message so the
14// tool_call/tool_result pairing stays valid, and attach the media to a
15// synthetic follow-up user message so vision- and audio-capable models can see
16// it.
17
18func TestDefaultToPrompt_MediaToolResult_ImagePNG(t *testing.T) {
19 t.Parallel()
20
21 imageData := base64.StdEncoding.EncodeToString([]byte{0, 1, 2, 3})
22 prompt := fantasy.Prompt{
23 {
24 Role: fantasy.MessageRoleAssistant,
25 Content: []fantasy.MessagePart{
26 fantasy.ToolCallPart{ToolCallID: "img-1", ToolName: "view", Input: "{}"},
27 },
28 },
29 {
30 Role: fantasy.MessageRoleTool,
31 Content: []fantasy.MessagePart{
32 fantasy.ToolResultPart{
33 ToolCallID: "img-1",
34 Output: fantasy.ToolResultOutputContentMedia{
35 Data: imageData,
36 MediaType: "image/png",
37 },
38 },
39 },
40 },
41 }
42
43 messages, warnings := DefaultToPrompt(prompt, "openrouter", "anthropic/claude-opus-4.7")
44
45 require.Empty(t, warnings)
46 // Assistant tool call + text tool message + synthetic user image message.
47 require.Len(t, messages, 3)
48
49 toolMsg := messages[1].OfTool
50 require.NotNil(t, toolMsg)
51 require.Equal(t, "img-1", toolMsg.ToolCallID)
52 require.Contains(t, toolMsg.Content.OfString.Value, "image/png")
53
54 userMsg := messages[2].OfUser
55 require.NotNil(t, userMsg)
56 require.Len(t, userMsg.Content.OfArrayOfContentParts, 1)
57 imagePart := userMsg.Content.OfArrayOfContentParts[0].OfImageURL
58 require.NotNil(t, imagePart)
59 require.Equal(t, "data:image/png;base64,"+imageData, imagePart.ImageURL.URL)
60}
61
62func TestDefaultToPrompt_MediaToolResult_PrefersAccompanyingText(t *testing.T) {
63 t.Parallel()
64
65 imageData := base64.StdEncoding.EncodeToString([]byte{9, 9, 9})
66 prompt := fantasy.Prompt{
67 {
68 Role: fantasy.MessageRoleAssistant,
69 Content: []fantasy.MessagePart{
70 fantasy.ToolCallPart{ToolCallID: "img-2", ToolName: "view", Input: "{}"},
71 },
72 },
73 {
74 Role: fantasy.MessageRoleTool,
75 Content: []fantasy.MessagePart{
76 fantasy.ToolResultPart{
77 ToolCallID: "img-2",
78 Output: fantasy.ToolResultOutputContentMedia{
79 Data: imageData,
80 MediaType: "image/jpeg",
81 Text: "Screenshot of the blockquote element.",
82 },
83 },
84 },
85 },
86 }
87
88 messages, warnings := DefaultToPrompt(prompt, "openrouter", "anthropic/claude-opus-4.7")
89
90 require.Empty(t, warnings)
91 require.Len(t, messages, 3)
92 require.Equal(t, "Screenshot of the blockquote element.", messages[1].OfTool.Content.OfString.Value)
93}
94
95func TestDefaultToPrompt_MediaToolResult_AudioWAV(t *testing.T) {
96 t.Parallel()
97
98 audio := base64.StdEncoding.EncodeToString([]byte("fake-wav-bytes"))
99 prompt := fantasy.Prompt{
100 {
101 Role: fantasy.MessageRoleAssistant,
102 Content: []fantasy.MessagePart{
103 fantasy.ToolCallPart{ToolCallID: "audio-1", ToolName: "record", Input: "{}"},
104 },
105 },
106 {
107 Role: fantasy.MessageRoleTool,
108 Content: []fantasy.MessagePart{
109 fantasy.ToolResultPart{
110 ToolCallID: "audio-1",
111 Output: fantasy.ToolResultOutputContentMedia{
112 Data: audio,
113 MediaType: "audio/wav",
114 },
115 },
116 },
117 },
118 }
119
120 messages, warnings := DefaultToPrompt(prompt, "openai", "gpt-4o-audio")
121
122 require.Empty(t, warnings)
123 require.Len(t, messages, 3)
124 require.NotNil(t, messages[1].OfTool)
125 userMsg := messages[2].OfUser
126 require.NotNil(t, userMsg)
127 require.Len(t, userMsg.Content.OfArrayOfContentParts, 1)
128 audioPart := userMsg.Content.OfArrayOfContentParts[0].OfInputAudio
129 require.NotNil(t, audioPart)
130 require.Equal(t, audio, audioPart.InputAudio.Data)
131 require.Equal(t, "wav", audioPart.InputAudio.Format)
132}
133
134func TestDefaultToPrompt_MediaToolResult_UnsupportedMediaType(t *testing.T) {
135 t.Parallel()
136
137 prompt := fantasy.Prompt{
138 {
139 Role: fantasy.MessageRoleAssistant,
140 Content: []fantasy.MessagePart{
141 fantasy.ToolCallPart{ToolCallID: "vid-1", ToolName: "record", Input: "{}"},
142 },
143 },
144 {
145 Role: fantasy.MessageRoleTool,
146 Content: []fantasy.MessagePart{
147 fantasy.ToolResultPart{
148 ToolCallID: "vid-1",
149 Output: fantasy.ToolResultOutputContentMedia{
150 Data: "AAAA",
151 MediaType: "video/mp4",
152 },
153 },
154 },
155 },
156 }
157
158 messages, warnings := DefaultToPrompt(prompt, "openai", "gpt-5")
159
160 // Assistant tool call + text tool message, but no synthetic user image.
161 require.Len(t, messages, 2)
162 require.NotNil(t, messages[1].OfTool)
163 require.Equal(t, "vid-1", messages[1].OfTool.ToolCallID)
164 require.Len(t, warnings, 1)
165 require.Contains(t, warnings[0].Message, "video/mp4")
166}
167
168func TestToResponsesPrompt_MediaToolResult_ImagePNG(t *testing.T) {
169 t.Parallel()
170
171 imageData := base64.StdEncoding.EncodeToString([]byte{7, 7, 7, 7})
172 prompt := fantasy.Prompt{
173 {
174 Role: fantasy.MessageRoleAssistant,
175 Content: []fantasy.MessagePart{
176 fantasy.ToolCallPart{ToolCallID: "img-resp-1", ToolName: "view", Input: "{}"},
177 },
178 },
179 {
180 Role: fantasy.MessageRoleTool,
181 Content: []fantasy.MessagePart{
182 fantasy.ToolResultPart{
183 ToolCallID: "img-resp-1",
184 Output: fantasy.ToolResultOutputContentMedia{
185 Data: imageData,
186 MediaType: "image/png",
187 },
188 },
189 },
190 },
191 }
192
193 input, warnings := toResponsesPrompt(prompt, "system", false)
194
195 require.Empty(t, warnings)
196 // Assistant function call + function_call_output + synthetic user image
197 // message.
198 require.Len(t, input, 3)
199
200 funcOut := input[1].OfFunctionCallOutput
201 require.NotNil(t, funcOut)
202 require.Equal(t, "img-resp-1", funcOut.CallID)
203 require.Contains(t, funcOut.Output.OfString.Value, "image/png")
204
205 userMsg := input[2].OfMessage
206 require.NotNil(t, userMsg)
207 parts := userMsg.Content.OfInputItemContentList
208 require.Len(t, parts, 1)
209 imagePart := parts[0].OfInputImage
210 require.NotNil(t, imagePart)
211 require.Equal(t, "data:image/png;base64,"+imageData, imagePart.ImageURL.Value)
212}
213
214func TestToResponsesPrompt_MediaToolResult_UnsupportedMediaType(t *testing.T) {
215 t.Parallel()
216
217 prompt := fantasy.Prompt{
218 {
219 Role: fantasy.MessageRoleAssistant,
220 Content: []fantasy.MessagePart{
221 fantasy.ToolCallPart{ToolCallID: "vid-resp-1", ToolName: "record", Input: "{}"},
222 },
223 },
224 {
225 Role: fantasy.MessageRoleTool,
226 Content: []fantasy.MessagePart{
227 fantasy.ToolResultPart{
228 ToolCallID: "vid-resp-1",
229 Output: fantasy.ToolResultOutputContentMedia{
230 Data: "AAAA",
231 MediaType: "video/mp4",
232 },
233 },
234 },
235 },
236 }
237
238 input, warnings := toResponsesPrompt(prompt, "system", false)
239
240 // Assistant function call + function_call_output, but no synthetic user
241 // image message.
242 require.Len(t, input, 2)
243 require.NotNil(t, input[1].OfFunctionCallOutput)
244 require.Equal(t, "vid-resp-1", input[1].OfFunctionCallOutput.CallID)
245 require.Len(t, warnings, 1)
246 require.Contains(t, warnings[0].Message, "video/mp4")
247}