image_upload_test.go

  1package providertests
  2
  3import (
  4	"cmp"
  5	"net/http"
  6	"os"
  7	"testing"
  8
  9	"charm.land/fantasy"
 10	"charm.land/fantasy/providers/anthropic"
 11	"charm.land/fantasy/providers/google"
 12	"charm.land/fantasy/providers/openai"
 13	"github.com/stretchr/testify/require"
 14	"gopkg.in/dnaeon/go-vcr.v4/pkg/recorder"
 15)
 16
 17func anthropicImageBuilder(model string) builderFunc {
 18	return func(t *testing.T, r *recorder.Recorder) (fantasy.LanguageModel, error) {
 19		provider, err := anthropic.New(
 20			anthropic.WithAPIKey(cmp.Or(os.Getenv("FANTASY_ANTHROPIC_API_KEY"), "(missing)")),
 21			anthropic.WithHTTPClient(&http.Client{Transport: r}),
 22		)
 23		if err != nil {
 24			return nil, err
 25		}
 26		return provider.LanguageModel(t.Context(), model)
 27	}
 28}
 29
 30func openAIImageBuilder(model string) builderFunc {
 31	return func(t *testing.T, r *recorder.Recorder) (fantasy.LanguageModel, error) {
 32		provider, err := openai.New(
 33			openai.WithAPIKey(cmp.Or(os.Getenv("FANTASY_OPENAI_API_KEY"), "(missing)")),
 34			openai.WithHTTPClient(&http.Client{Transport: r}),
 35		)
 36		if err != nil {
 37			return nil, err
 38		}
 39		return provider.LanguageModel(t.Context(), model)
 40	}
 41}
 42
 43func geminiImageBuilder(model string) builderFunc {
 44	return func(t *testing.T, r *recorder.Recorder) (fantasy.LanguageModel, error) {
 45		provider, err := google.New(
 46			google.WithGeminiAPIKey(cmp.Or(os.Getenv("FANTASY_GEMINI_API_KEY"), "(missing)")),
 47			google.WithHTTPClient(&http.Client{Transport: r}),
 48		)
 49		if err != nil {
 50			return nil, err
 51		}
 52		return provider.LanguageModel(t.Context(), model)
 53	}
 54}
 55
 56func TestImageUploadAgent(t *testing.T) {
 57	pairs := []builderPair{
 58		{
 59			name:    "anthropic-claude-sonnet-4",
 60			builder: anthropicImageBuilder("claude-sonnet-4-20250514"),
 61		},
 62		{
 63			name:    "openai-gpt-5",
 64			builder: openAIImageBuilder("gpt-5"),
 65		},
 66		{
 67			name:    "gemini-2.5-pro",
 68			builder: geminiImageBuilder("gemini-2.5-pro"),
 69		},
 70	}
 71
 72	img, err := os.ReadFile("testdata/wish.png")
 73	require.NoError(t, err)
 74
 75	file := fantasy.FilePart{Filename: "wish.png", Data: img, MediaType: "image/png"}
 76
 77	for _, pair := range pairs {
 78		t.Run(pair.name, func(t *testing.T) {
 79			r := newRecorder(t)
 80
 81			lm, err := pair.builder(t, r)
 82			require.NoError(t, err)
 83
 84			agent := fantasy.NewAgent(
 85				lm,
 86				fantasy.WithSystemPrompt("You are a helpful assistant"),
 87			)
 88
 89			result, err := agent.Generate(t.Context(), fantasy.AgentCall{
 90				Prompt:          "Describe the image briefly in English.",
 91				Files:           []fantasy.FilePart{file},
 92				ProviderOptions: pair.providerOptions,
 93				MaxOutputTokens: fantasy.Opt(int64(4000)),
 94			})
 95			require.NoError(t, err)
 96			got := result.Response.Content.Text()
 97			require.NotEmpty(t, got, "expected non-empty description for %s", pair.name)
 98		})
 99	}
100}
101
102func TestImageUploadAgentStreaming(t *testing.T) {
103	pairs := []builderPair{
104		{
105			name:    "anthropic-claude-sonnet-4",
106			builder: anthropicImageBuilder("claude-sonnet-4-20250514"),
107		},
108		{
109			name:    "openai-gpt-5",
110			builder: openAIImageBuilder("gpt-5"),
111		},
112		{
113			name:    "gemini-2.5-pro",
114			builder: geminiImageBuilder("gemini-2.5-pro"),
115		},
116	}
117
118	img, err := os.ReadFile("testdata/wish.png")
119	require.NoError(t, err)
120
121	file := fantasy.FilePart{Filename: "wish.png", Data: img, MediaType: "image/png"}
122
123	for _, pair := range pairs {
124		t.Run(pair.name+"-stream", func(t *testing.T) {
125			r := newRecorder(t)
126
127			lm, err := pair.builder(t, r)
128			require.NoError(t, err)
129
130			agent := fantasy.NewAgent(
131				lm,
132				fantasy.WithSystemPrompt("You are a helpful assistant"),
133			)
134
135			result, err := agent.Stream(t.Context(), fantasy.AgentStreamCall{
136				Prompt:          "Describe the image briefly in English.",
137				Files:           []fantasy.FilePart{file},
138				ProviderOptions: pair.providerOptions,
139				MaxOutputTokens: fantasy.Opt(int64(4000)),
140			})
141			require.NoError(t, err)
142			got := result.Response.Content.Text()
143			require.NotEmpty(t, got, "expected non-empty description for %s", pair.name)
144		})
145	}
146}