1package main
2
3// This example demonstrates Anthropic computer use with the agent
4// helper. It shows how to:
5//
6// 1. Wire up the provider, model, and computer use tool.
7// 2. Register the tool via WithProviderDefinedTools so the agent
8// handles the tool-call loop automatically.
9// 3. Parse incoming tool calls with ParseComputerUseInput inside
10// the Run function.
11// 4. Return results (screenshots, errors) back to the agent.
12
13import (
14 "bytes"
15 "context"
16 "fmt"
17 "image"
18 "image/color"
19 "image/png"
20 "os"
21
22 "charm.land/fantasy"
23 "charm.land/fantasy/providers/anthropic"
24)
25
26// takeScreenshot is a stub that simulates capturing a screenshot.
27// In a real implementation this would capture the virtual display
28// and return raw PNG bytes.
29func takeScreenshot() ([]byte, error) {
30 // Generate a valid 1x1 black PNG as a placeholder.
31 img := image.NewRGBA(image.Rect(0, 0, 1, 1))
32 img.Set(0, 0, color.Black)
33 var buf bytes.Buffer
34 if err := png.Encode(&buf, img); err != nil {
35 return nil, err
36 }
37 return buf.Bytes(), nil
38}
39
40func main() {
41 // Set up the Anthropic provider.
42 provider, err := anthropic.New(anthropic.WithAPIKey(os.Getenv("ANTHROPIC_API_KEY")))
43 if err != nil {
44 fmt.Fprintln(os.Stderr, "could not create provider:", err)
45 os.Exit(1)
46 }
47
48 ctx := context.Background()
49
50 // Pick the model.
51 model, err := provider.LanguageModel(ctx, "claude-opus-4-6")
52 if err != nil {
53 fmt.Fprintln(os.Stderr, "could not get language model:", err)
54 os.Exit(1)
55 }
56
57 // Create a computer use tool with a Run function that executes
58 // actions and returns screenshots.
59 computerTool := anthropic.NewComputerUseTool(anthropic.ComputerUseToolOptions{
60 DisplayWidthPx: 1920,
61 DisplayHeightPx: 1080,
62 ToolVersion: anthropic.ComputerUse20251124,
63 }, func(ctx context.Context, call fantasy.ToolCall) (fantasy.ToolResponse, error) {
64 action, err := anthropic.ParseComputerUseInput(call.Input)
65 if err != nil {
66 return fantasy.ToolResponse{}, fmt.Errorf("parse computer use input: %w", err)
67 }
68
69 fmt.Printf("Action: %s\n", action.Action)
70
71 // In production you would execute the action (click,
72 // type, scroll, etc.) against the virtual display and
73 // then capture a screenshot.
74 png, err := takeScreenshot()
75 if err != nil {
76 return fantasy.ToolResponse{}, fmt.Errorf("take screenshot: %w", err)
77 }
78 return fantasy.NewImageResponse(png, "image/png"), nil
79 })
80
81 // Build an agent with the computer use tool. The agent handles
82 // the tool-call loop: it sends the prompt, executes any tool
83 // calls the model returns, feeds the results back, and repeats
84 // until the model stops requesting tools.
85 agent := fantasy.NewAgent(model,
86 fantasy.WithProviderDefinedTools(computerTool),
87 fantasy.WithStopConditions(fantasy.StepCountIs(10)),
88 )
89
90 result, err := agent.Generate(ctx, fantasy.AgentCall{
91 Prompt: "Take a screenshot of the desktop",
92 })
93 if err != nil {
94 fmt.Fprintln(os.Stderr, "agent error:", err)
95 os.Exit(1)
96 }
97
98 fmt.Println("Agent finished.")
99 fmt.Printf("Steps: %d\n", len(result.Steps))
100 if text := result.Response.Content.Text(); text != "" {
101 fmt.Println("Claude said:", text)
102 }
103}