oai_responses_test.go

  1package oai
  2
  3import (
  4	"context"
  5	"encoding/json"
  6	"net/http"
  7	"net/http/httptest"
  8	"os"
  9	"testing"
 10
 11	"shelley.exe.dev/llm"
 12)
 13
 14func TestResponsesServiceBasic(t *testing.T) {
 15	// This is a basic compile-time test to ensure ResponsesService implements llm.Service
 16	var _ llm.Service = (*ResponsesService)(nil)
 17}
 18
 19func TestFromLLMMessageResponses(t *testing.T) {
 20	tests := []struct {
 21		name     string
 22		msg      llm.Message
 23		expected int // expected number of output items
 24	}{
 25		{
 26			name: "simple user message",
 27			msg: llm.Message{
 28				Role: llm.MessageRoleUser,
 29				Content: []llm.Content{
 30					{Type: llm.ContentTypeText, Text: "Hello"},
 31				},
 32			},
 33			expected: 1,
 34		},
 35		{
 36			name: "assistant message with text",
 37			msg: llm.Message{
 38				Role: llm.MessageRoleAssistant,
 39				Content: []llm.Content{
 40					{Type: llm.ContentTypeText, Text: "Hi there"},
 41				},
 42			},
 43			expected: 1,
 44		},
 45		{
 46			name: "message with tool use",
 47			msg: llm.Message{
 48				Role: llm.MessageRoleAssistant,
 49				Content: []llm.Content{
 50					{
 51						Type:      llm.ContentTypeToolUse,
 52						ID:        "call_123",
 53						ToolName:  "get_weather",
 54						ToolInput: json.RawMessage(`{"location":"SF"}`),
 55					},
 56				},
 57			},
 58			expected: 1,
 59		},
 60		{
 61			name: "message with tool result",
 62			msg: llm.Message{
 63				Role: llm.MessageRoleUser,
 64				Content: []llm.Content{
 65					{
 66						Type:      llm.ContentTypeToolResult,
 67						ToolUseID: "call_123",
 68						ToolResult: []llm.Content{
 69							{Type: llm.ContentTypeText, Text: "72 degrees"},
 70						},
 71					},
 72				},
 73			},
 74			expected: 1,
 75		},
 76		{
 77			name: "message with text and tool use",
 78			msg: llm.Message{
 79				Role: llm.MessageRoleAssistant,
 80				Content: []llm.Content{
 81					{Type: llm.ContentTypeText, Text: "Let me check"},
 82					{
 83						Type:      llm.ContentTypeToolUse,
 84						ID:        "call_123",
 85						ToolName:  "get_weather",
 86						ToolInput: json.RawMessage(`{"location":"SF"}`),
 87					},
 88				},
 89			},
 90			expected: 2, // one message item, one function_call item
 91		},
 92	}
 93
 94	for _, tt := range tests {
 95		t.Run(tt.name, func(t *testing.T) {
 96			items := fromLLMMessageResponses(tt.msg)
 97			if len(items) != tt.expected {
 98				t.Errorf("expected %d items, got %d", tt.expected, len(items))
 99			}
100
101			// Verify structure based on content type
102			for _, item := range items {
103				switch item.Type {
104				case "message":
105					if item.Role == "" {
106						t.Error("message item missing role")
107					}
108					if len(item.Content) == 0 {
109						t.Error("message item has no content")
110					}
111				case "function_call":
112					if item.CallID == "" {
113						t.Error("function_call item missing call_id")
114					}
115					if item.Name == "" {
116						t.Error("function_call item missing name")
117					}
118				case "function_call_output":
119					if item.CallID == "" {
120						t.Error("function_call_output item missing call_id")
121					}
122				}
123			}
124		})
125	}
126}
127
128func TestFromLLMToolResponses(t *testing.T) {
129	tool := &llm.Tool{
130		Name:        "test_tool",
131		Description: "A test tool",
132		InputSchema: llm.MustSchema(`{
133			"type": "object",
134			"properties": {
135				"param": {"type": "string"}
136			}
137		}`),
138	}
139
140	rtool := fromLLMToolResponses(tool)
141
142	if rtool.Type != "function" {
143		t.Errorf("expected type 'function', got %s", rtool.Type)
144	}
145	if rtool.Name != "test_tool" {
146		t.Errorf("expected name 'test_tool', got %s", rtool.Name)
147	}
148	if rtool.Description != "A test tool" {
149		t.Errorf("expected description 'A test tool', got %s", rtool.Description)
150	}
151	if len(rtool.Parameters) == 0 {
152		t.Error("expected parameters to be set")
153	}
154}
155
156func TestFromLLMSystemResponses(t *testing.T) {
157	tests := []struct {
158		name     string
159		system   []llm.SystemContent
160		expected int
161	}{
162		{
163			name:     "empty system",
164			system:   []llm.SystemContent{},
165			expected: 0,
166		},
167		{
168			name: "single system message",
169			system: []llm.SystemContent{
170				{Text: "You are a helpful assistant"},
171			},
172			expected: 1,
173		},
174		{
175			name: "multiple system messages",
176			system: []llm.SystemContent{
177				{Text: "You are a helpful assistant"},
178				{Text: "Be concise"},
179			},
180			expected: 1, // should be combined into one message
181		},
182	}
183
184	for _, tt := range tests {
185		t.Run(tt.name, func(t *testing.T) {
186			items := fromLLMSystemResponses(tt.system)
187			if len(items) != tt.expected {
188				t.Errorf("expected %d items, got %d", len(items), tt.expected)
189			}
190		})
191	}
192}
193
194func TestToLLMResponseFromResponses(t *testing.T) {
195	svc := &ResponsesService{}
196
197	tests := []struct {
198		name           string
199		resp           *responsesResponse
200		expectedReason llm.StopReason
201		contentCount   int
202	}{
203		{
204			name: "simple text response",
205			resp: &responsesResponse{
206				ID:    "resp_123",
207				Model: "gpt-5.1-codex",
208				Output: []responsesOutputItem{
209					{
210						Type: "message",
211						Role: "assistant",
212						Content: []responsesContent{
213							{Type: "output_text", Text: "Hello!"},
214						},
215					},
216				},
217			},
218			expectedReason: llm.StopReasonStopSequence,
219			contentCount:   1,
220		},
221		{
222			name: "response with function call",
223			resp: &responsesResponse{
224				ID:    "resp_123",
225				Model: "gpt-5.1-codex",
226				Output: []responsesOutputItem{
227					{
228						Type:      "function_call",
229						CallID:    "call_123",
230						Name:      "get_weather",
231						Arguments: `{"location":"SF"}`,
232					},
233				},
234			},
235			expectedReason: llm.StopReasonToolUse,
236			contentCount:   1,
237		},
238		{
239			name: "response with reasoning and message",
240			resp: &responsesResponse{
241				ID:    "resp_123",
242				Model: "gpt-5.1-codex",
243				Output: []responsesOutputItem{
244					{
245						Type:    "reasoning",
246						Summary: []string{"Let me think", "about this"},
247					},
248					{
249						Type: "message",
250						Role: "assistant",
251						Content: []responsesContent{
252							{Type: "output_text", Text: "Here's the answer"},
253						},
254					},
255				},
256			},
257			expectedReason: llm.StopReasonStopSequence,
258			contentCount:   2, // reasoning + text
259		},
260	}
261
262	for _, tt := range tests {
263		t.Run(tt.name, func(t *testing.T) {
264			llmResp := svc.toLLMResponseFromResponses(tt.resp, nil)
265
266			if llmResp.ID != tt.resp.ID {
267				t.Errorf("expected ID %s, got %s", tt.resp.ID, llmResp.ID)
268			}
269			if llmResp.Model != tt.resp.Model {
270				t.Errorf("expected model %s, got %s", tt.resp.Model, llmResp.Model)
271			}
272			if llmResp.StopReason != tt.expectedReason {
273				t.Errorf("expected stop reason %v, got %v", tt.expectedReason, llmResp.StopReason)
274			}
275			if len(llmResp.Content) != tt.contentCount {
276				t.Errorf("expected %d content items, got %d", tt.contentCount, len(llmResp.Content))
277			}
278		})
279	}
280}
281
282func TestResponsesServiceTokenContextWindow(t *testing.T) {
283	tests := []struct {
284		model    Model
285		expected int
286	}{
287		{model: GPT53Codex, expected: 288000},
288		{model: GPT52Codex, expected: 272000},
289		{model: GPT5Codex, expected: 256000},
290		{model: GPT41, expected: 200000},
291		{model: GPT4o, expected: 128000},
292	}
293
294	for _, tt := range tests {
295		t.Run(tt.model.UserName, func(t *testing.T) {
296			svc := &ResponsesService{Model: tt.model}
297			got := svc.TokenContextWindow()
298			if got != tt.expected {
299				t.Errorf("expected %d, got %d", tt.expected, got)
300			}
301		})
302	}
303}
304
305func TestResponsesServiceConfigDetails(t *testing.T) {
306	svc := &ResponsesService{
307		Model:  GPT5Codex,
308		APIKey: "test-key",
309	}
310
311	details := svc.ConfigDetails()
312
313	if details["model_name"] != "gpt-5.1-codex" {
314		t.Errorf("expected model_name 'gpt-5.1-codex', got %s", details["model_name"])
315	}
316	if details["full_url"] != "https://api.openai.com/v1/responses" {
317		t.Errorf("unexpected full_url: %s", details["full_url"])
318	}
319	if details["has_api_key_set"] != "true" {
320		t.Error("expected has_api_key_set to be true")
321	}
322}
323
324// TestResponsesServiceIntegration is a live test that requires OPENAI_API_KEY
325// Run with: go test -v -run TestResponsesServiceIntegration
326func TestResponsesServiceIntegration(t *testing.T) {
327	if testing.Short() {
328		t.Skip("skipping integration test in short mode")
329	}
330
331	apiKey := os.Getenv(OpenAIAPIKeyEnv)
332	if apiKey == "" {
333		t.Skip("OPENAI_API_KEY not set, skipping integration test")
334	}
335
336	svc := &ResponsesService{
337		APIKey: apiKey,
338		Model:  GPT5Codex,
339	}
340
341	ctx := context.Background()
342
343	t.Run("simple request", func(t *testing.T) {
344		req := &llm.Request{
345			Messages: []llm.Message{
346				{
347					Role: llm.MessageRoleUser,
348					Content: []llm.Content{
349						{Type: llm.ContentTypeText, Text: "Say 'hello' and nothing else"},
350					},
351				},
352			},
353		}
354
355		resp, err := svc.Do(ctx, req)
356		if err != nil {
357			t.Fatalf("request failed: %v", err)
358		}
359
360		if resp.ID == "" {
361			t.Error("expected response ID to be set")
362		}
363		if resp.Model != "gpt-5.1-codex" {
364			t.Errorf("expected model gpt-5.1-codex, got %s", resp.Model)
365		}
366		if len(resp.Content) == 0 {
367			t.Error("expected response to have content")
368		}
369	})
370
371	t.Run("request with tools", func(t *testing.T) {
372		req := &llm.Request{
373			Messages: []llm.Message{
374				{
375					Role: llm.MessageRoleUser,
376					Content: []llm.Content{
377						{Type: llm.ContentTypeText, Text: "What's the weather in Paris?"},
378					},
379				},
380			},
381			Tools: []*llm.Tool{
382				{
383					Name:        "get_weather",
384					Description: "Get weather for a location",
385					InputSchema: llm.MustSchema(`{
386						"type": "object",
387						"properties": {
388							"location": {"type": "string"}
389						},
390						"required": ["location"]
391					}`),
392				},
393			},
394		}
395
396		resp, err := svc.Do(ctx, req)
397		if err != nil {
398			t.Fatalf("request failed: %v", err)
399		}
400
401		if resp.StopReason != llm.StopReasonToolUse {
402			t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
403		}
404
405		// Find the tool use content
406		var foundToolUse bool
407		for _, c := range resp.Content {
408			if c.Type == llm.ContentTypeToolUse {
409				foundToolUse = true
410				if c.ToolName != "get_weather" {
411					t.Errorf("expected tool name get_weather, got %s", c.ToolName)
412				}
413			}
414		}
415		if !foundToolUse {
416			t.Error("expected to find tool use in response")
417		}
418	})
419}
420
421// Test system content with all empty text (should return nil)
422func TestFromLLMSystemResponsesAllEmpty(t *testing.T) {
423	items := fromLLMSystemResponses([]llm.SystemContent{
424		{Text: ""},
425		{Text: ""},
426		{Text: ""},
427	})
428	if items != nil {
429		t.Errorf("fromLLMSystemResponses(all empty) = %v, expected nil", items)
430	}
431}
432
433func TestResponsesServiceDo(t *testing.T) {
434	// Create a mock Responses server
435	server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
436		if r.URL.Path != "/responses" {
437			t.Errorf("Expected path /responses, got %s", r.URL.Path)
438		}
439		if r.Header.Get("Authorization") != "Bearer test-api-key" {
440			t.Errorf("Expected Authorization header, got %s", r.Header.Get("Authorization"))
441		}
442
443		// Send a mock response
444		response := responsesResponse{
445			ID:    "responses-test123",
446			Model: "test-model",
447			Output: []responsesOutputItem{
448				{
449					Type: "message",
450					Role: "assistant",
451					Content: []responsesContent{
452						{
453							Type: "text",
454							Text: "Hello! How can I help you today?",
455						},
456					},
457				},
458			},
459			Usage: responsesUsage{
460				InputTokens:  10,
461				OutputTokens: 20,
462			},
463		}
464
465		w.Header().Set("Content-Type", "application/json")
466		json.NewEncoder(w).Encode(response)
467	}))
468	defer server.Close()
469
470	// Create a service with the mock server
471	ctx := context.Background()
472	svc := &ResponsesService{
473		APIKey:   "test-api-key",
474		Model:    GPT41,
475		ModelURL: server.URL,
476	}
477
478	// Create a test request
479	req := &llm.Request{
480		Messages: []llm.Message{
481			{
482				Role: llm.MessageRoleUser,
483				Content: []llm.Content{
484					{Type: llm.ContentTypeText, Text: "Hello!"},
485				},
486			},
487		},
488	}
489
490	// Call the Do method
491	resp, err := svc.Do(ctx, req)
492	if err != nil {
493		t.Fatalf("Do() error = %v", err)
494	}
495
496	// Verify the response
497	if resp == nil {
498		t.Fatal("Do() returned nil response")
499	}
500	if resp.Role != llm.MessageRoleAssistant {
501		t.Errorf("resp.Role = %v, expected %v", resp.Role, llm.MessageRoleAssistant)
502	}
503	if len(resp.Content) != 1 {
504		t.Errorf("resp.Content length = %d, expected 1", len(resp.Content))
505	} else {
506		content := resp.Content[0]
507		if content.Type != llm.ContentTypeText {
508			t.Errorf("content.Type = %v, expected %v", content.Type, llm.ContentTypeText)
509		}
510		if content.Text != "Hello! How can I help you today?" {
511			t.Errorf("content.Text = %q, expected %q", content.Text, "Hello! How can I help you today?")
512		}
513	}
514	if resp.Usage.InputTokens != 10 {
515		t.Errorf("resp.Usage.InputTokens = %d, expected 10", resp.Usage.InputTokens)
516	}
517	if resp.Usage.OutputTokens != 20 {
518		t.Errorf("resp.Usage.OutputTokens = %d, expected 20", resp.Usage.OutputTokens)
519	}
520}