1package oai
2
3import (
4 "context"
5 "encoding/json"
6 "net/http"
7 "net/http/httptest"
8 "os"
9 "testing"
10
11 "shelley.exe.dev/llm"
12)
13
14func TestResponsesServiceBasic(t *testing.T) {
15 // This is a basic compile-time test to ensure ResponsesService implements llm.Service
16 var _ llm.Service = (*ResponsesService)(nil)
17}
18
19func TestFromLLMMessageResponses(t *testing.T) {
20 tests := []struct {
21 name string
22 msg llm.Message
23 expected int // expected number of output items
24 }{
25 {
26 name: "simple user message",
27 msg: llm.Message{
28 Role: llm.MessageRoleUser,
29 Content: []llm.Content{
30 {Type: llm.ContentTypeText, Text: "Hello"},
31 },
32 },
33 expected: 1,
34 },
35 {
36 name: "assistant message with text",
37 msg: llm.Message{
38 Role: llm.MessageRoleAssistant,
39 Content: []llm.Content{
40 {Type: llm.ContentTypeText, Text: "Hi there"},
41 },
42 },
43 expected: 1,
44 },
45 {
46 name: "message with tool use",
47 msg: llm.Message{
48 Role: llm.MessageRoleAssistant,
49 Content: []llm.Content{
50 {
51 Type: llm.ContentTypeToolUse,
52 ID: "call_123",
53 ToolName: "get_weather",
54 ToolInput: json.RawMessage(`{"location":"SF"}`),
55 },
56 },
57 },
58 expected: 1,
59 },
60 {
61 name: "message with tool result",
62 msg: llm.Message{
63 Role: llm.MessageRoleUser,
64 Content: []llm.Content{
65 {
66 Type: llm.ContentTypeToolResult,
67 ToolUseID: "call_123",
68 ToolResult: []llm.Content{
69 {Type: llm.ContentTypeText, Text: "72 degrees"},
70 },
71 },
72 },
73 },
74 expected: 1,
75 },
76 {
77 name: "message with text and tool use",
78 msg: llm.Message{
79 Role: llm.MessageRoleAssistant,
80 Content: []llm.Content{
81 {Type: llm.ContentTypeText, Text: "Let me check"},
82 {
83 Type: llm.ContentTypeToolUse,
84 ID: "call_123",
85 ToolName: "get_weather",
86 ToolInput: json.RawMessage(`{"location":"SF"}`),
87 },
88 },
89 },
90 expected: 2, // one message item, one function_call item
91 },
92 }
93
94 for _, tt := range tests {
95 t.Run(tt.name, func(t *testing.T) {
96 items := fromLLMMessageResponses(tt.msg)
97 if len(items) != tt.expected {
98 t.Errorf("expected %d items, got %d", tt.expected, len(items))
99 }
100
101 // Verify structure based on content type
102 for _, item := range items {
103 switch item.Type {
104 case "message":
105 if item.Role == "" {
106 t.Error("message item missing role")
107 }
108 if len(item.Content) == 0 {
109 t.Error("message item has no content")
110 }
111 case "function_call":
112 if item.CallID == "" {
113 t.Error("function_call item missing call_id")
114 }
115 if item.Name == "" {
116 t.Error("function_call item missing name")
117 }
118 case "function_call_output":
119 if item.CallID == "" {
120 t.Error("function_call_output item missing call_id")
121 }
122 }
123 }
124 })
125 }
126}
127
128func TestFromLLMToolResponses(t *testing.T) {
129 tool := &llm.Tool{
130 Name: "test_tool",
131 Description: "A test tool",
132 InputSchema: llm.MustSchema(`{
133 "type": "object",
134 "properties": {
135 "param": {"type": "string"}
136 }
137 }`),
138 }
139
140 rtool := fromLLMToolResponses(tool)
141
142 if rtool.Type != "function" {
143 t.Errorf("expected type 'function', got %s", rtool.Type)
144 }
145 if rtool.Name != "test_tool" {
146 t.Errorf("expected name 'test_tool', got %s", rtool.Name)
147 }
148 if rtool.Description != "A test tool" {
149 t.Errorf("expected description 'A test tool', got %s", rtool.Description)
150 }
151 if len(rtool.Parameters) == 0 {
152 t.Error("expected parameters to be set")
153 }
154}
155
156func TestFromLLMSystemResponses(t *testing.T) {
157 tests := []struct {
158 name string
159 system []llm.SystemContent
160 expected int
161 }{
162 {
163 name: "empty system",
164 system: []llm.SystemContent{},
165 expected: 0,
166 },
167 {
168 name: "single system message",
169 system: []llm.SystemContent{
170 {Text: "You are a helpful assistant"},
171 },
172 expected: 1,
173 },
174 {
175 name: "multiple system messages",
176 system: []llm.SystemContent{
177 {Text: "You are a helpful assistant"},
178 {Text: "Be concise"},
179 },
180 expected: 1, // should be combined into one message
181 },
182 }
183
184 for _, tt := range tests {
185 t.Run(tt.name, func(t *testing.T) {
186 items := fromLLMSystemResponses(tt.system)
187 if len(items) != tt.expected {
188 t.Errorf("expected %d items, got %d", len(items), tt.expected)
189 }
190 })
191 }
192}
193
194func TestToLLMResponseFromResponses(t *testing.T) {
195 svc := &ResponsesService{}
196
197 tests := []struct {
198 name string
199 resp *responsesResponse
200 expectedReason llm.StopReason
201 contentCount int
202 }{
203 {
204 name: "simple text response",
205 resp: &responsesResponse{
206 ID: "resp_123",
207 Model: "gpt-5.1-codex",
208 Output: []responsesOutputItem{
209 {
210 Type: "message",
211 Role: "assistant",
212 Content: []responsesContent{
213 {Type: "output_text", Text: "Hello!"},
214 },
215 },
216 },
217 },
218 expectedReason: llm.StopReasonStopSequence,
219 contentCount: 1,
220 },
221 {
222 name: "response with function call",
223 resp: &responsesResponse{
224 ID: "resp_123",
225 Model: "gpt-5.1-codex",
226 Output: []responsesOutputItem{
227 {
228 Type: "function_call",
229 CallID: "call_123",
230 Name: "get_weather",
231 Arguments: `{"location":"SF"}`,
232 },
233 },
234 },
235 expectedReason: llm.StopReasonToolUse,
236 contentCount: 1,
237 },
238 {
239 name: "response with reasoning and message",
240 resp: &responsesResponse{
241 ID: "resp_123",
242 Model: "gpt-5.1-codex",
243 Output: []responsesOutputItem{
244 {
245 Type: "reasoning",
246 Summary: []string{"Let me think", "about this"},
247 },
248 {
249 Type: "message",
250 Role: "assistant",
251 Content: []responsesContent{
252 {Type: "output_text", Text: "Here's the answer"},
253 },
254 },
255 },
256 },
257 expectedReason: llm.StopReasonStopSequence,
258 contentCount: 2, // reasoning + text
259 },
260 }
261
262 for _, tt := range tests {
263 t.Run(tt.name, func(t *testing.T) {
264 llmResp := svc.toLLMResponseFromResponses(tt.resp, nil)
265
266 if llmResp.ID != tt.resp.ID {
267 t.Errorf("expected ID %s, got %s", tt.resp.ID, llmResp.ID)
268 }
269 if llmResp.Model != tt.resp.Model {
270 t.Errorf("expected model %s, got %s", tt.resp.Model, llmResp.Model)
271 }
272 if llmResp.StopReason != tt.expectedReason {
273 t.Errorf("expected stop reason %v, got %v", tt.expectedReason, llmResp.StopReason)
274 }
275 if len(llmResp.Content) != tt.contentCount {
276 t.Errorf("expected %d content items, got %d", tt.contentCount, len(llmResp.Content))
277 }
278 })
279 }
280}
281
282func TestResponsesServiceTokenContextWindow(t *testing.T) {
283 tests := []struct {
284 model Model
285 expected int
286 }{
287 {model: GPT53Codex, expected: 288000},
288 {model: GPT52Codex, expected: 272000},
289 {model: GPT5Codex, expected: 256000},
290 {model: GPT41, expected: 200000},
291 {model: GPT4o, expected: 128000},
292 }
293
294 for _, tt := range tests {
295 t.Run(tt.model.UserName, func(t *testing.T) {
296 svc := &ResponsesService{Model: tt.model}
297 got := svc.TokenContextWindow()
298 if got != tt.expected {
299 t.Errorf("expected %d, got %d", tt.expected, got)
300 }
301 })
302 }
303}
304
305func TestResponsesServiceConfigDetails(t *testing.T) {
306 svc := &ResponsesService{
307 Model: GPT5Codex,
308 APIKey: "test-key",
309 }
310
311 details := svc.ConfigDetails()
312
313 if details["model_name"] != "gpt-5.1-codex" {
314 t.Errorf("expected model_name 'gpt-5.1-codex', got %s", details["model_name"])
315 }
316 if details["full_url"] != "https://api.openai.com/v1/responses" {
317 t.Errorf("unexpected full_url: %s", details["full_url"])
318 }
319 if details["has_api_key_set"] != "true" {
320 t.Error("expected has_api_key_set to be true")
321 }
322}
323
324// TestResponsesServiceIntegration is a live test that requires OPENAI_API_KEY
325// Run with: go test -v -run TestResponsesServiceIntegration
326func TestResponsesServiceIntegration(t *testing.T) {
327 if testing.Short() {
328 t.Skip("skipping integration test in short mode")
329 }
330
331 apiKey := os.Getenv(OpenAIAPIKeyEnv)
332 if apiKey == "" {
333 t.Skip("OPENAI_API_KEY not set, skipping integration test")
334 }
335
336 svc := &ResponsesService{
337 APIKey: apiKey,
338 Model: GPT5Codex,
339 }
340
341 ctx := context.Background()
342
343 t.Run("simple request", func(t *testing.T) {
344 req := &llm.Request{
345 Messages: []llm.Message{
346 {
347 Role: llm.MessageRoleUser,
348 Content: []llm.Content{
349 {Type: llm.ContentTypeText, Text: "Say 'hello' and nothing else"},
350 },
351 },
352 },
353 }
354
355 resp, err := svc.Do(ctx, req)
356 if err != nil {
357 t.Fatalf("request failed: %v", err)
358 }
359
360 if resp.ID == "" {
361 t.Error("expected response ID to be set")
362 }
363 if resp.Model != "gpt-5.1-codex" {
364 t.Errorf("expected model gpt-5.1-codex, got %s", resp.Model)
365 }
366 if len(resp.Content) == 0 {
367 t.Error("expected response to have content")
368 }
369 })
370
371 t.Run("request with tools", func(t *testing.T) {
372 req := &llm.Request{
373 Messages: []llm.Message{
374 {
375 Role: llm.MessageRoleUser,
376 Content: []llm.Content{
377 {Type: llm.ContentTypeText, Text: "What's the weather in Paris?"},
378 },
379 },
380 },
381 Tools: []*llm.Tool{
382 {
383 Name: "get_weather",
384 Description: "Get weather for a location",
385 InputSchema: llm.MustSchema(`{
386 "type": "object",
387 "properties": {
388 "location": {"type": "string"}
389 },
390 "required": ["location"]
391 }`),
392 },
393 },
394 }
395
396 resp, err := svc.Do(ctx, req)
397 if err != nil {
398 t.Fatalf("request failed: %v", err)
399 }
400
401 if resp.StopReason != llm.StopReasonToolUse {
402 t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
403 }
404
405 // Find the tool use content
406 var foundToolUse bool
407 for _, c := range resp.Content {
408 if c.Type == llm.ContentTypeToolUse {
409 foundToolUse = true
410 if c.ToolName != "get_weather" {
411 t.Errorf("expected tool name get_weather, got %s", c.ToolName)
412 }
413 }
414 }
415 if !foundToolUse {
416 t.Error("expected to find tool use in response")
417 }
418 })
419}
420
421// Test system content with all empty text (should return nil)
422func TestFromLLMSystemResponsesAllEmpty(t *testing.T) {
423 items := fromLLMSystemResponses([]llm.SystemContent{
424 {Text: ""},
425 {Text: ""},
426 {Text: ""},
427 })
428 if items != nil {
429 t.Errorf("fromLLMSystemResponses(all empty) = %v, expected nil", items)
430 }
431}
432
433func TestResponsesServiceDo(t *testing.T) {
434 // Create a mock Responses server
435 server := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
436 if r.URL.Path != "/responses" {
437 t.Errorf("Expected path /responses, got %s", r.URL.Path)
438 }
439 if r.Header.Get("Authorization") != "Bearer test-api-key" {
440 t.Errorf("Expected Authorization header, got %s", r.Header.Get("Authorization"))
441 }
442
443 // Send a mock response
444 response := responsesResponse{
445 ID: "responses-test123",
446 Model: "test-model",
447 Output: []responsesOutputItem{
448 {
449 Type: "message",
450 Role: "assistant",
451 Content: []responsesContent{
452 {
453 Type: "text",
454 Text: "Hello! How can I help you today?",
455 },
456 },
457 },
458 },
459 Usage: responsesUsage{
460 InputTokens: 10,
461 OutputTokens: 20,
462 },
463 }
464
465 w.Header().Set("Content-Type", "application/json")
466 json.NewEncoder(w).Encode(response)
467 }))
468 defer server.Close()
469
470 // Create a service with the mock server
471 ctx := context.Background()
472 svc := &ResponsesService{
473 APIKey: "test-api-key",
474 Model: GPT41,
475 ModelURL: server.URL,
476 }
477
478 // Create a test request
479 req := &llm.Request{
480 Messages: []llm.Message{
481 {
482 Role: llm.MessageRoleUser,
483 Content: []llm.Content{
484 {Type: llm.ContentTypeText, Text: "Hello!"},
485 },
486 },
487 },
488 }
489
490 // Call the Do method
491 resp, err := svc.Do(ctx, req)
492 if err != nil {
493 t.Fatalf("Do() error = %v", err)
494 }
495
496 // Verify the response
497 if resp == nil {
498 t.Fatal("Do() returned nil response")
499 }
500 if resp.Role != llm.MessageRoleAssistant {
501 t.Errorf("resp.Role = %v, expected %v", resp.Role, llm.MessageRoleAssistant)
502 }
503 if len(resp.Content) != 1 {
504 t.Errorf("resp.Content length = %d, expected 1", len(resp.Content))
505 } else {
506 content := resp.Content[0]
507 if content.Type != llm.ContentTypeText {
508 t.Errorf("content.Type = %v, expected %v", content.Type, llm.ContentTypeText)
509 }
510 if content.Text != "Hello! How can I help you today?" {
511 t.Errorf("content.Text = %q, expected %q", content.Text, "Hello! How can I help you today?")
512 }
513 }
514 if resp.Usage.InputTokens != 10 {
515 t.Errorf("resp.Usage.InputTokens = %d, expected 10", resp.Usage.InputTokens)
516 }
517 if resp.Usage.OutputTokens != 20 {
518 t.Errorf("resp.Usage.OutputTokens = %d, expected 20", resp.Usage.OutputTokens)
519 }
520}