computer_use_test.go

  1package anthropic
  2
  3import (
  4	"encoding/base64"
  5	"errors"
  6	"testing"
  7
  8	"charm.land/fantasy"
  9	"github.com/stretchr/testify/require"
 10)
 11
 12func TestParseComputerUseInput(t *testing.T) {
 13	t.Parallel()
 14
 15	t.Run("screenshot", func(t *testing.T) {
 16		t.Parallel()
 17		input, err := ParseComputerUseInput(`{"action":"screenshot"}`)
 18		require.NoError(t, err)
 19		require.Equal(t, ActionScreenshot, input.Action)
 20		require.Equal(t, [2]int64{0, 0}, input.Coordinate)
 21		require.Equal(t, "", input.Text)
 22	})
 23
 24	t.Run("left_click with coordinate", func(t *testing.T) {
 25		t.Parallel()
 26		input, err := ParseComputerUseInput(`{"action":"left_click","coordinate":[100,200]}`)
 27		require.NoError(t, err)
 28		require.Equal(t, ActionLeftClick, input.Action)
 29		require.Equal(t, [2]int64{100, 200}, input.Coordinate)
 30	})
 31
 32	t.Run("right_click with coordinate", func(t *testing.T) {
 33		t.Parallel()
 34		input, err := ParseComputerUseInput(`{"action":"right_click","coordinate":[50,75]}`)
 35		require.NoError(t, err)
 36		require.Equal(t, ActionRightClick, input.Action)
 37		require.Equal(t, [2]int64{50, 75}, input.Coordinate)
 38	})
 39
 40	t.Run("double_click with coordinate", func(t *testing.T) {
 41		t.Parallel()
 42		input, err := ParseComputerUseInput(`{"action":"double_click","coordinate":[300,400]}`)
 43		require.NoError(t, err)
 44		require.Equal(t, ActionDoubleClick, input.Action)
 45		require.Equal(t, [2]int64{300, 400}, input.Coordinate)
 46	})
 47
 48	t.Run("middle_click with coordinate", func(t *testing.T) {
 49		t.Parallel()
 50		input, err := ParseComputerUseInput(`{"action":"middle_click","coordinate":[10,20]}`)
 51		require.NoError(t, err)
 52		require.Equal(t, ActionMiddleClick, input.Action)
 53		require.Equal(t, [2]int64{10, 20}, input.Coordinate)
 54	})
 55
 56	t.Run("mouse_move with coordinate", func(t *testing.T) {
 57		t.Parallel()
 58		input, err := ParseComputerUseInput(`{"action":"mouse_move","coordinate":[500,600]}`)
 59		require.NoError(t, err)
 60		require.Equal(t, ActionMouseMove, input.Action)
 61		require.Equal(t, [2]int64{500, 600}, input.Coordinate)
 62	})
 63
 64	t.Run("left_click_drag with start_coordinate and coordinate", func(t *testing.T) {
 65		t.Parallel()
 66		input, err := ParseComputerUseInput(`{"action":"left_click_drag","start_coordinate":[10,20],"coordinate":[300,400]}`)
 67		require.NoError(t, err)
 68		require.Equal(t, ActionLeftClickDrag, input.Action)
 69		require.Equal(t, [2]int64{10, 20}, input.StartCoordinate)
 70		require.Equal(t, [2]int64{300, 400}, input.Coordinate)
 71	})
 72
 73	t.Run("type with text", func(t *testing.T) {
 74		t.Parallel()
 75		input, err := ParseComputerUseInput(`{"action":"type","text":"hello world"}`)
 76		require.NoError(t, err)
 77		require.Equal(t, ActionType, input.Action)
 78		require.Equal(t, "hello world", input.Text)
 79	})
 80
 81	t.Run("key with text", func(t *testing.T) {
 82		t.Parallel()
 83		input, err := ParseComputerUseInput(`{"action":"key","text":"ctrl+c"}`)
 84		require.NoError(t, err)
 85		require.Equal(t, ActionKey, input.Action)
 86		require.Equal(t, "ctrl+c", input.Text)
 87	})
 88
 89	t.Run("scroll with coordinate direction and amount", func(t *testing.T) {
 90		t.Parallel()
 91		input, err := ParseComputerUseInput(`{"action":"scroll","coordinate":[960,540],"scroll_direction":"down","scroll_amount":3}`)
 92		require.NoError(t, err)
 93		require.Equal(t, ActionScroll, input.Action)
 94		require.Equal(t, [2]int64{960, 540}, input.Coordinate)
 95		require.Equal(t, "down", input.ScrollDirection)
 96		require.Equal(t, int64(3), input.ScrollAmount)
 97	})
 98
 99	t.Run("invalid JSON returns error", func(t *testing.T) {
100		t.Parallel()
101		_, err := ParseComputerUseInput(`{not valid json}`)
102		require.Error(t, err)
103	})
104
105	t.Run("triple_click with coordinate", func(t *testing.T) {
106		t.Parallel()
107		input, err := ParseComputerUseInput(`{"action":"triple_click","coordinate":[120,240]}`)
108		require.NoError(t, err)
109		require.Equal(t, ActionTripleClick, input.Action)
110		require.Equal(t, [2]int64{120, 240}, input.Coordinate)
111	})
112
113	t.Run("left_mouse_down with coordinate", func(t *testing.T) {
114		t.Parallel()
115		input, err := ParseComputerUseInput(`{"action":"left_mouse_down","coordinate":[80,90]}`)
116		require.NoError(t, err)
117		require.Equal(t, ActionLeftMouseDown, input.Action)
118		require.Equal(t, [2]int64{80, 90}, input.Coordinate)
119	})
120
121	t.Run("left_mouse_up with coordinate", func(t *testing.T) {
122		t.Parallel()
123		input, err := ParseComputerUseInput(`{"action":"left_mouse_up","coordinate":[80,90]}`)
124		require.NoError(t, err)
125		require.Equal(t, ActionLeftMouseUp, input.Action)
126		require.Equal(t, [2]int64{80, 90}, input.Coordinate)
127	})
128
129	t.Run("wait", func(t *testing.T) {
130		t.Parallel()
131		input, err := ParseComputerUseInput(`{"action":"wait"}`)
132		require.NoError(t, err)
133		require.Equal(t, ActionWait, input.Action)
134		require.Equal(t, [2]int64{0, 0}, input.Coordinate)
135		require.Equal(t, "", input.Text)
136	})
137
138	t.Run("zoom with region", func(t *testing.T) {
139		t.Parallel()
140		input, err := ParseComputerUseInput(`{"action":"zoom","region":[100,200,500,600]}`)
141		require.NoError(t, err)
142		require.Equal(t, ActionZoom, input.Action)
143		require.Equal(t, [4]int64{100, 200, 500, 600}, input.Region)
144	})
145
146	t.Run("left_click with modifier key", func(t *testing.T) {
147		t.Parallel()
148		input, err := ParseComputerUseInput(`{"action":"left_click","coordinate":[100,200],"text":"shift"}`)
149		require.NoError(t, err)
150		require.Equal(t, ActionLeftClick, input.Action)
151		require.Equal(t, [2]int64{100, 200}, input.Coordinate)
152		require.Equal(t, "shift", input.Text)
153	})
154
155	t.Run("unknown action parses without error", func(t *testing.T) {
156		t.Parallel()
157		input, err := ParseComputerUseInput(`{"action":"future_action","coordinate":[1,2]}`)
158		require.NoError(t, err)
159		require.Equal(t, ComputerAction("future_action"), input.Action)
160		require.Equal(t, [2]int64{1, 2}, input.Coordinate)
161	})
162
163	t.Run("hold_key with duration", func(t *testing.T) {
164		t.Parallel()
165		input, err := ParseComputerUseInput(`{"action":"hold_key","text":"shift","duration":2}`)
166		require.NoError(t, err)
167		require.Equal(t, ActionHoldKey, input.Action)
168		require.Equal(t, "shift", input.Text)
169		require.Equal(t, int64(2), input.Duration)
170	})
171}
172
173func TestNewComputerUseScreenshotResult(t *testing.T) {
174	t.Parallel()
175
176	t.Run("base64 encodes PNG bytes", func(t *testing.T) {
177		t.Parallel()
178		pngData := []byte{0x89, 0x50, 0x4E, 0x47, 0x0D, 0x0A}
179		result := NewComputerUseScreenshotResult("call-123", pngData)
180
181		require.Equal(t, "call-123", result.ToolCallID)
182
183		media, ok := result.Output.(fantasy.ToolResultOutputContentMedia)
184		require.True(t, ok, "output should be ToolResultOutputContentMedia")
185		require.Equal(t, "image/png", media.MediaType)
186		require.Equal(t, base64.StdEncoding.EncodeToString(pngData), media.Data)
187	})
188
189	t.Run("preserves tool call ID", func(t *testing.T) {
190		t.Parallel()
191		result := NewComputerUseScreenshotResult("tc_abc", []byte{0x01})
192		require.Equal(t, "tc_abc", result.ToolCallID)
193	})
194
195	t.Run("empty screenshot bytes", func(t *testing.T) {
196		t.Parallel()
197		result := NewComputerUseScreenshotResult("call-empty", []byte{})
198
199		media, ok := result.Output.(fantasy.ToolResultOutputContentMedia)
200		require.True(t, ok)
201		require.Equal(t, "image/png", media.MediaType)
202		require.Equal(t, "", media.Data)
203	})
204
205	t.Run("output content type is media", func(t *testing.T) {
206		t.Parallel()
207		result := NewComputerUseScreenshotResult("call-type", []byte{0xFF})
208		require.Equal(t, fantasy.ToolResultContentTypeMedia, result.Output.GetType())
209	})
210}
211
212func TestNewComputerUseScreenshotResultWithMediaType(t *testing.T) {
213	t.Parallel()
214
215	t.Run("custom media type and base64 data", func(t *testing.T) {
216		t.Parallel()
217		b64 := base64.StdEncoding.EncodeToString([]byte("jpeg-data"))
218		result := NewComputerUseScreenshotResultWithMediaType("call-456", b64, "image/jpeg")
219
220		require.Equal(t, "call-456", result.ToolCallID)
221
222		media, ok := result.Output.(fantasy.ToolResultOutputContentMedia)
223		require.True(t, ok, "output should be ToolResultOutputContentMedia")
224		require.Equal(t, "image/jpeg", media.MediaType)
225		require.Equal(t, b64, media.Data)
226	})
227
228	t.Run("preserves tool call ID", func(t *testing.T) {
229		t.Parallel()
230		result := NewComputerUseScreenshotResultWithMediaType("tc_xyz", "data", "image/webp")
231		require.Equal(t, "tc_xyz", result.ToolCallID)
232	})
233
234	t.Run("output content type is media", func(t *testing.T) {
235		t.Parallel()
236		result := NewComputerUseScreenshotResultWithMediaType("call-type", "data", "image/png")
237		require.Equal(t, fantasy.ToolResultContentTypeMedia, result.Output.GetType())
238	})
239}
240
241func TestNewComputerUseErrorResult(t *testing.T) {
242	t.Parallel()
243
244	t.Run("error message propagates", func(t *testing.T) {
245		t.Parallel()
246		err := errors.New("screenshot capture failed")
247		result := NewComputerUseErrorResult("call-err", err)
248
249		require.Equal(t, "call-err", result.ToolCallID)
250
251		errOutput, ok := result.Output.(fantasy.ToolResultOutputContentError)
252		require.True(t, ok, "output should be ToolResultOutputContentError")
253		require.Equal(t, "screenshot capture failed", errOutput.Error.Error())
254	})
255
256	t.Run("preserves tool call ID", func(t *testing.T) {
257		t.Parallel()
258		result := NewComputerUseErrorResult("tc_err", errors.New("fail"))
259		require.Equal(t, "tc_err", result.ToolCallID)
260	})
261
262	t.Run("output content type is error", func(t *testing.T) {
263		t.Parallel()
264		result := NewComputerUseErrorResult("call-type", errors.New("oops"))
265		require.Equal(t, fantasy.ToolResultContentTypeError, result.Output.GetType())
266	})
267}
268
269func TestNewComputerUseTextResult(t *testing.T) {
270	t.Parallel()
271
272	t.Run("text content is set", func(t *testing.T) {
273		t.Parallel()
274		result := NewComputerUseTextResult("call-txt", "action completed successfully")
275
276		require.Equal(t, "call-txt", result.ToolCallID)
277
278		textOutput, ok := result.Output.(fantasy.ToolResultOutputContentText)
279		require.True(t, ok, "output should be ToolResultOutputContentText")
280		require.Equal(t, "action completed successfully", textOutput.Text)
281	})
282
283	t.Run("preserves tool call ID", func(t *testing.T) {
284		t.Parallel()
285		result := NewComputerUseTextResult("tc_text", "hello")
286		require.Equal(t, "tc_text", result.ToolCallID)
287	})
288
289	t.Run("empty text", func(t *testing.T) {
290		t.Parallel()
291		result := NewComputerUseTextResult("call-empty", "")
292
293		textOutput, ok := result.Output.(fantasy.ToolResultOutputContentText)
294		require.True(t, ok)
295		require.Equal(t, "", textOutput.Text)
296	})
297
298	t.Run("output content type is text", func(t *testing.T) {
299		t.Parallel()
300		result := NewComputerUseTextResult("call-type", "test")
301		require.Equal(t, fantasy.ToolResultContentTypeText, result.Output.GetType())
302	})
303}