1package oai
2
3import (
4 "bytes"
5 "cmp"
6 "context"
7 "encoding/json"
8 "errors"
9 "fmt"
10 "io"
11 "log/slog"
12 "math/rand/v2"
13 "net/http"
14 "strings"
15 "time"
16
17 "shelley.exe.dev/llm"
18)
19
20// ResponsesService provides chat completions using the OpenAI Responses API.
21// This API is required for models like gpt-5.1-codex.
22// Fields should not be altered concurrently with calling any method on ResponsesService.
23type ResponsesService struct {
24 HTTPC *http.Client // defaults to http.DefaultClient if nil
25 APIKey string // optional, if not set will try to load from env var
26 Model Model // defaults to DefaultModel if zero value
27 ModelURL string // optional, overrides Model.URL
28 MaxTokens int // defaults to DefaultMaxTokens if zero
29 Org string // optional - organization ID
30 DumpLLM bool // whether to dump request/response text to files for debugging; defaults to false
31 ThinkingLevel llm.ThinkingLevel // thinking level (ThinkingLevelOff disables reasoning)
32}
33
34var _ llm.Service = (*ResponsesService)(nil)
35
36// Responses API request/response types
37
38type responsesRequest struct {
39 Model string `json:"model"`
40 Input []responsesInputItem `json:"input"`
41 Tools []responsesTool `json:"tools,omitempty"`
42 ToolChoice any `json:"tool_choice,omitempty"`
43 MaxOutputTokens int `json:"max_output_tokens,omitempty"`
44 Reasoning *responsesReasoning `json:"reasoning,omitempty"`
45}
46
47type responsesReasoning struct {
48 Effort string `json:"effort,omitempty"` // "low", "medium", "high"
49}
50
51type responsesInputItem struct {
52 Type string `json:"type"` // "message", "function_call", "function_call_output"
53 Role string `json:"role,omitempty"` // for messages: "user", "assistant"
54 Content []responsesContent `json:"content,omitempty"` // for messages
55 CallID string `json:"call_id,omitempty"` // for function_call and function_call_output
56 Name string `json:"name,omitempty"` // for function_call
57 Arguments string `json:"arguments,omitempty"` // for function_call
58 Output string `json:"output,omitempty"` // for function_call_output
59}
60
61type responsesContent struct {
62 Type string `json:"type"` // "input_text", "output_text"
63 Text string `json:"text"`
64}
65
66type responsesTool struct {
67 Type string `json:"type"` // "function"
68 Name string `json:"name"`
69 Description string `json:"description,omitempty"`
70 Parameters json.RawMessage `json:"parameters,omitempty"`
71}
72
73type responsesResponse struct {
74 ID string `json:"id"`
75 Object string `json:"object"` // "response"
76 CreatedAt int64 `json:"created_at"`
77 Status string `json:"status"` // "completed", "incomplete", etc.
78 Model string `json:"model"`
79 Output []responsesOutputItem `json:"output"`
80 Usage responsesUsage `json:"usage"`
81 Error *responsesError `json:"error"`
82}
83
84type responsesOutputItem struct {
85 ID string `json:"id"`
86 Type string `json:"type"` // "message", "reasoning", "function_call"
87 Role string `json:"role,omitempty"` // for messages: "assistant"
88 Status string `json:"status,omitempty"`
89 Content []responsesContent `json:"content,omitempty"` // for messages
90 CallID string `json:"call_id,omitempty"` // for function_call
91 Name string `json:"name,omitempty"` // for function_call
92 Arguments string `json:"arguments,omitempty"` // for function_call
93 Summary []string `json:"summary,omitempty"` // for reasoning
94}
95
96type responsesUsage struct {
97 InputTokens int `json:"input_tokens"`
98 InputTokensDetails *responsesInputTokensDetails `json:"input_tokens_details,omitempty"`
99 OutputTokens int `json:"output_tokens"`
100 OutputTokensDetails *responsesOutputTokensDetails `json:"output_tokens_details,omitempty"`
101 TotalTokens int `json:"total_tokens"`
102}
103
104type responsesInputTokensDetails struct {
105 CachedTokens int `json:"cached_tokens"`
106}
107
108type responsesOutputTokensDetails struct {
109 ReasoningTokens int `json:"reasoning_tokens"`
110}
111
112type responsesError struct {
113 Message string `json:"message"`
114 Type string `json:"type"`
115 Param string `json:"param"`
116 Code string `json:"code"`
117}
118
119// fromLLMMessageResponses converts llm.Message to Responses API input items
120func fromLLMMessageResponses(msg llm.Message) []responsesInputItem {
121 var items []responsesInputItem
122
123 // Separate tool results from regular content
124 var regularContent []llm.Content
125 var toolResults []llm.Content
126
127 for _, c := range msg.Content {
128 if c.Type == llm.ContentTypeToolResult {
129 toolResults = append(toolResults, c)
130 } else {
131 regularContent = append(regularContent, c)
132 }
133 }
134
135 // Process tool results first - they need to come before the assistant message
136 for _, tr := range toolResults {
137 // Collect all text from content objects
138 var texts []string
139 for _, result := range tr.ToolResult {
140 if strings.TrimSpace(result.Text) != "" {
141 texts = append(texts, result.Text)
142 }
143 }
144 toolResultContent := strings.Join(texts, "\n")
145
146 // Add error prefix if needed
147 if tr.ToolError {
148 if toolResultContent != "" {
149 toolResultContent = "error: " + toolResultContent
150 } else {
151 toolResultContent = "error: tool execution failed"
152 }
153 }
154
155 items = append(items, responsesInputItem{
156 Type: "function_call_output",
157 CallID: tr.ToolUseID,
158 Output: cmp.Or(toolResultContent, " "),
159 })
160 }
161
162 // Process regular content
163 if len(regularContent) > 0 {
164 var messageContent []responsesContent
165 var functionCalls []responsesInputItem
166
167 for _, c := range regularContent {
168 switch c.Type {
169 case llm.ContentTypeText:
170 if c.Text != "" {
171 contentType := "input_text"
172 if msg.Role == llm.MessageRoleAssistant {
173 contentType = "output_text"
174 }
175 messageContent = append(messageContent, responsesContent{
176 Type: contentType,
177 Text: c.Text,
178 })
179 }
180 case llm.ContentTypeToolUse:
181 // Tool use becomes a function_call in the input
182 functionCalls = append(functionCalls, responsesInputItem{
183 Type: "function_call",
184 CallID: c.ID,
185 Name: c.ToolName,
186 Arguments: string(c.ToolInput),
187 })
188 }
189 }
190
191 // Add message if it has content
192 if len(messageContent) > 0 {
193 role := "user"
194 if msg.Role == llm.MessageRoleAssistant {
195 role = "assistant"
196 }
197 items = append(items, responsesInputItem{
198 Type: "message",
199 Role: role,
200 Content: messageContent,
201 })
202 }
203
204 // Add function calls
205 items = append(items, functionCalls...)
206 }
207
208 return items
209}
210
211// fromLLMToolResponses converts llm.Tool to Responses API tool format
212func fromLLMToolResponses(t *llm.Tool) responsesTool {
213 return responsesTool{
214 Type: "function",
215 Name: t.Name,
216 Description: t.Description,
217 Parameters: t.InputSchema,
218 }
219}
220
221// fromLLMSystemResponses converts llm.SystemContent to Responses API input items
222func fromLLMSystemResponses(systemContent []llm.SystemContent) []responsesInputItem {
223 if len(systemContent) == 0 {
224 return nil
225 }
226
227 // Combine all system content into a single system message
228 var systemText string
229 for i, content := range systemContent {
230 if i > 0 && systemText != "" && content.Text != "" {
231 systemText += "\n"
232 }
233 systemText += content.Text
234 }
235
236 if systemText == "" {
237 return nil
238 }
239
240 return []responsesInputItem{
241 {
242 Type: "message",
243 Role: "user",
244 Content: []responsesContent{
245 {
246 Type: "input_text",
247 Text: systemText,
248 },
249 },
250 },
251 }
252}
253
254// toLLMResponseFromResponses converts Responses API response to llm.Response
255func (s *ResponsesService) toLLMResponseFromResponses(resp *responsesResponse, headers http.Header) *llm.Response {
256 if len(resp.Output) == 0 {
257 return &llm.Response{
258 ID: resp.ID,
259 Model: resp.Model,
260 Role: llm.MessageRoleAssistant,
261 Usage: s.toLLMUsageFromResponses(resp.Usage, headers),
262 }
263 }
264
265 // Process the output items
266 var contents []llm.Content
267 var stopReason llm.StopReason = llm.StopReasonStopSequence
268
269 for _, item := range resp.Output {
270 switch item.Type {
271 case "message":
272 // Convert message content
273 for _, c := range item.Content {
274 if c.Text != "" {
275 contents = append(contents, llm.Content{
276 Type: llm.ContentTypeText,
277 Text: c.Text,
278 })
279 }
280 }
281 case "reasoning":
282 // Convert reasoning to thinking content
283 if len(item.Summary) > 0 {
284 summaryText := strings.Join(item.Summary, "\n")
285 contents = append(contents, llm.Content{
286 Type: llm.ContentTypeThinking,
287 Text: summaryText,
288 })
289 }
290 case "function_call":
291 // Convert function call to tool use
292 contents = append(contents, llm.Content{
293 ID: item.CallID,
294 Type: llm.ContentTypeToolUse,
295 ToolName: item.Name,
296 ToolInput: json.RawMessage(item.Arguments),
297 })
298 stopReason = llm.StopReasonToolUse
299 }
300 }
301
302 // If no content, add empty text content
303 if len(contents) == 0 {
304 contents = append(contents, llm.Content{
305 Type: llm.ContentTypeText,
306 Text: "",
307 })
308 }
309
310 return &llm.Response{
311 ID: resp.ID,
312 Model: resp.Model,
313 Role: llm.MessageRoleAssistant,
314 Content: contents,
315 StopReason: stopReason,
316 Usage: s.toLLMUsageFromResponses(resp.Usage, headers),
317 }
318}
319
320// toLLMUsageFromResponses converts Responses API usage to llm.Usage
321func (s *ResponsesService) toLLMUsageFromResponses(usage responsesUsage, headers http.Header) llm.Usage {
322 in := uint64(usage.InputTokens)
323 var inc uint64
324 if usage.InputTokensDetails != nil {
325 inc = uint64(usage.InputTokensDetails.CachedTokens)
326 }
327 out := uint64(usage.OutputTokens)
328 u := llm.Usage{
329 InputTokens: in,
330 CacheReadInputTokens: inc,
331 CacheCreationInputTokens: in,
332 OutputTokens: out,
333 }
334 u.CostUSD = llm.CostUSDFromResponse(headers)
335 return u
336}
337
338// TokenContextWindow returns the maximum token context window size for this service
339func (s *ResponsesService) TokenContextWindow() int {
340 model := cmp.Or(s.Model, DefaultModel)
341
342 // Use the same context window logic as the regular service
343 switch model.ModelName {
344 case "gpt-5.3-codex":
345 return 288000 // 288k for gpt-5.3-codex
346 case "gpt-5.2-codex":
347 return 272000 // 272k for gpt-5.2-codex
348 case "gpt-5.1-codex":
349 return 256000 // 256k for gpt-5.1-codex
350 case "gpt-4.1-2025-04-14", "gpt-4.1-mini-2025-04-14", "gpt-4.1-nano-2025-04-14":
351 return 200000
352 case "gpt-4o-2024-08-06", "gpt-4o-mini-2024-07-18":
353 return 128000
354 default:
355 return 128000
356 }
357}
358
359// MaxImageDimension returns the maximum allowed image dimension.
360// TODO: determine actual OpenAI image dimension limits
361func (s *ResponsesService) MaxImageDimension() int {
362 return 0 // No known limit
363}
364
365// Do sends a request to OpenAI using the Responses API.
366func (s *ResponsesService) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) {
367 httpc := cmp.Or(s.HTTPC, http.DefaultClient)
368 model := cmp.Or(s.Model, DefaultModel)
369
370 // Start with system messages if provided
371 var allInput []responsesInputItem
372 if len(ir.System) > 0 {
373 sysItems := fromLLMSystemResponses(ir.System)
374 allInput = append(allInput, sysItems...)
375 }
376
377 // Add regular messages
378 for _, msg := range ir.Messages {
379 items := fromLLMMessageResponses(msg)
380 allInput = append(allInput, items...)
381 }
382
383 // Convert tools
384 var tools []responsesTool
385 for _, t := range ir.Tools {
386 tools = append(tools, fromLLMToolResponses(t))
387 }
388
389 // Create the request
390 req := responsesRequest{
391 Model: model.ModelName,
392 Input: allInput,
393 Tools: tools,
394 MaxOutputTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens),
395 }
396
397 // Add reasoning if thinking is enabled
398 if s.ThinkingLevel != llm.ThinkingLevelOff {
399 effort := s.ThinkingLevel.ThinkingEffort()
400 if effort != "" {
401 req.Reasoning = &responsesReasoning{Effort: effort}
402 }
403 }
404
405 // Add tool choice if specified
406 if ir.ToolChoice != nil {
407 req.ToolChoice = fromLLMToolChoice(ir.ToolChoice)
408 }
409
410 // Construct the full URL
411 baseURL := cmp.Or(s.ModelURL, model.URL, OpenAIURL)
412 fullURL := baseURL + "/responses"
413
414 // Marshal the request
415 reqJSON, err := json.Marshal(req)
416 if err != nil {
417 return nil, fmt.Errorf("failed to marshal request: %w", err)
418 }
419
420 // Dump request if enabled
421 if s.DumpLLM {
422 if reqJSONPretty, err := json.MarshalIndent(req, "", " "); err == nil {
423 if err := llm.DumpToFile("request", fullURL, reqJSONPretty); err != nil {
424 slog.WarnContext(ctx, "failed to dump responses request to file", "error", err)
425 }
426 }
427 }
428
429 // Retry mechanism
430 backoff := []time.Duration{1 * time.Second, 2 * time.Second, 5 * time.Second, 10 * time.Second, 15 * time.Second}
431
432 // retry loop
433 var errs error // accumulated errors across all attempts
434 for attempts := 0; ; attempts++ {
435 if attempts > 10 {
436 return nil, fmt.Errorf("responses request failed after %d attempts (url=%s, model=%s): %w", attempts, fullURL, model.ModelName, errs)
437 }
438 if attempts > 0 {
439 sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
440 slog.WarnContext(ctx, "responses request sleep before retry", "sleep", sleep, "attempts", attempts)
441 time.Sleep(sleep)
442 }
443
444 // Create HTTP request
445 httpReq, err := http.NewRequestWithContext(ctx, "POST", fullURL, bytes.NewReader(reqJSON))
446 if err != nil {
447 return nil, fmt.Errorf("failed to create request: %w", err)
448 }
449
450 httpReq.Header.Set("Content-Type", "application/json")
451 httpReq.Header.Set("Authorization", "Bearer "+s.APIKey)
452 if s.Org != "" {
453 httpReq.Header.Set("OpenAI-Organization", s.Org)
454 }
455
456 // Send request
457 httpResp, err := httpc.Do(httpReq)
458 if err != nil {
459 errs = errors.Join(errs, fmt.Errorf("attempt %d: %w", attempts+1, err))
460 continue
461 }
462 defer httpResp.Body.Close()
463
464 // Read response body
465 body, err := io.ReadAll(httpResp.Body)
466 if err != nil {
467 return nil, fmt.Errorf("failed to read response body: %w", err)
468 }
469
470 // Handle non-200 responses
471 if httpResp.StatusCode != http.StatusOK {
472 var apiErr responsesError
473 if jsonErr := json.Unmarshal(body, &struct {
474 Error *responsesError `json:"error"`
475 }{Error: &apiErr}); jsonErr == nil && apiErr.Message != "" {
476 // We have a structured error
477 switch {
478 case httpResp.StatusCode >= 500:
479 // Server error, retry
480 slog.WarnContext(ctx, "responses_request_failed", "error", apiErr.Message, "status_code", httpResp.StatusCode, "url", fullURL, "model", model.ModelName)
481 errs = errors.Join(errs, fmt.Errorf("status %d (url=%s, model=%s): %s", httpResp.StatusCode, fullURL, model.ModelName, apiErr.Message))
482 continue
483
484 case httpResp.StatusCode == 429:
485 // Rate limited, retry
486 slog.WarnContext(ctx, "responses_request_rate_limited", "error", apiErr.Message, "url", fullURL, "model", model.ModelName)
487 errs = errors.Join(errs, fmt.Errorf("status %d (rate limited, url=%s, model=%s): %s", httpResp.StatusCode, fullURL, model.ModelName, apiErr.Message))
488 continue
489
490 case httpResp.StatusCode >= 400 && httpResp.StatusCode < 500:
491 // Client error, probably unrecoverable
492 slog.WarnContext(ctx, "responses_request_failed", "error", apiErr.Message, "status_code", httpResp.StatusCode, "url", fullURL, "model", model.ModelName)
493 return nil, errors.Join(errs, fmt.Errorf("status %d (url=%s, model=%s): %s", httpResp.StatusCode, fullURL, model.ModelName, apiErr.Message))
494 }
495 }
496
497 // No structured error, use the raw body
498 slog.WarnContext(ctx, "responses_request_failed", "status_code", httpResp.StatusCode, "url", fullURL, "model", model.ModelName, "body", string(body))
499 return nil, fmt.Errorf("status %d (url=%s, model=%s): %s", httpResp.StatusCode, fullURL, model.ModelName, string(body))
500 }
501
502 // Parse successful response
503 var resp responsesResponse
504 if err := json.Unmarshal(body, &resp); err != nil {
505 return nil, fmt.Errorf("failed to unmarshal response: %w", err)
506 }
507
508 // Check for errors in the response
509 if resp.Error != nil {
510 return nil, fmt.Errorf("response contains error: %s", resp.Error.Message)
511 }
512
513 // Dump response if enabled
514 if s.DumpLLM {
515 if respJSON, err := json.MarshalIndent(resp, "", " "); err == nil {
516 if err := llm.DumpToFile("response", "", respJSON); err != nil {
517 slog.WarnContext(ctx, "failed to dump responses response to file", "error", err)
518 }
519 }
520 }
521
522 return s.toLLMResponseFromResponses(&resp, httpResp.Header), nil
523 }
524}
525
526func (s *ResponsesService) UseSimplifiedPatch() bool {
527 return s.Model.UseSimplifiedPatch
528}
529
530// ConfigDetails returns configuration information for logging
531func (s *ResponsesService) ConfigDetails() map[string]string {
532 model := cmp.Or(s.Model, DefaultModel)
533 baseURL := cmp.Or(s.ModelURL, model.URL, OpenAIURL)
534 return map[string]string{
535 "base_url": baseURL,
536 "model_name": model.ModelName,
537 "full_url": baseURL + "/responses",
538 "api_key_env": model.APIKeyEnv,
539 "has_api_key_set": fmt.Sprintf("%v", s.APIKey != ""),
540 }
541}