main.go

  1package main
  2
  3// This example demonstrates Anthropic computer use with the agent
  4// helper. It shows how to:
  5//
  6//  1. Wire up the provider, model, and computer use tool.
  7//  2. Register the tool via WithProviderDefinedTools so the agent
  8//     handles the tool-call loop automatically.
  9//  3. Parse incoming tool calls with ParseComputerUseInput inside
 10//     the Run function.
 11//  4. Return results (screenshots, errors) back to the agent.
 12
 13import (
 14	"bytes"
 15	"context"
 16	"fmt"
 17	"image"
 18	"image/color"
 19	"image/png"
 20	"os"
 21
 22	"charm.land/fantasy"
 23	"charm.land/fantasy/providers/anthropic"
 24)
 25
 26// takeScreenshot is a stub that simulates capturing a screenshot.
 27// In a real implementation this would capture the virtual display
 28// and return raw PNG bytes.
 29func takeScreenshot() ([]byte, error) {
 30	// Generate a valid 1x1 black PNG as a placeholder.
 31	img := image.NewRGBA(image.Rect(0, 0, 1, 1))
 32	img.Set(0, 0, color.Black)
 33	var buf bytes.Buffer
 34	if err := png.Encode(&buf, img); err != nil {
 35		return nil, err
 36	}
 37	return buf.Bytes(), nil
 38}
 39
 40func main() {
 41	// Set up the Anthropic provider.
 42	provider, err := anthropic.New(anthropic.WithAPIKey(os.Getenv("ANTHROPIC_API_KEY")))
 43	if err != nil {
 44		fmt.Fprintln(os.Stderr, "could not create provider:", err)
 45		os.Exit(1)
 46	}
 47
 48	ctx := context.Background()
 49
 50	// Pick the model.
 51	model, err := provider.LanguageModel(ctx, "claude-opus-4-6")
 52	if err != nil {
 53		fmt.Fprintln(os.Stderr, "could not get language model:", err)
 54		os.Exit(1)
 55	}
 56
 57	// Create a computer use tool with a Run function that executes
 58	// actions and returns screenshots.
 59	computerTool := anthropic.NewComputerUseTool(anthropic.ComputerUseToolOptions{
 60		DisplayWidthPx:  1920,
 61		DisplayHeightPx: 1080,
 62		ToolVersion:     anthropic.ComputerUse20251124,
 63	}, func(ctx context.Context, call fantasy.ToolCall) (fantasy.ToolResponse, error) {
 64		action, err := anthropic.ParseComputerUseInput(call.Input)
 65		if err != nil {
 66			return fantasy.ToolResponse{}, fmt.Errorf("parse computer use input: %w", err)
 67		}
 68
 69		fmt.Printf("Action: %s\n", action.Action)
 70
 71		// In production you would execute the action (click,
 72		// type, scroll, etc.) against the virtual display and
 73		// then capture a screenshot.
 74		png, err := takeScreenshot()
 75		if err != nil {
 76			return fantasy.ToolResponse{}, fmt.Errorf("take screenshot: %w", err)
 77		}
 78		return fantasy.NewImageResponse(png, "image/png"), nil
 79	})
 80
 81	// Build an agent with the computer use tool. The agent handles
 82	// the tool-call loop: it sends the prompt, executes any tool
 83	// calls the model returns, feeds the results back, and repeats
 84	// until the model stops requesting tools.
 85	agent := fantasy.NewAgent(model,
 86		fantasy.WithProviderDefinedTools(computerTool),
 87		fantasy.WithStopConditions(fantasy.StepCountIs(10)),
 88	)
 89
 90	result, err := agent.Generate(ctx, fantasy.AgentCall{
 91		Prompt: "Take a screenshot of the desktop",
 92	})
 93	if err != nil {
 94		fmt.Fprintln(os.Stderr, "agent error:", err)
 95		os.Exit(1)
 96	}
 97
 98	fmt.Println("Agent finished.")
 99	fmt.Printf("Steps: %d\n", len(result.Steps))
100	if text := result.Response.Content.Text(); text != "" {
101		fmt.Println("Claude said:", text)
102	}
103}