oai_responses.go

  1package oai
  2
  3import (
  4	"bytes"
  5	"cmp"
  6	"context"
  7	"encoding/json"
  8	"errors"
  9	"fmt"
 10	"io"
 11	"log/slog"
 12	"math/rand/v2"
 13	"net/http"
 14	"strings"
 15	"time"
 16
 17	"shelley.exe.dev/llm"
 18)
 19
 20// ResponsesService provides chat completions using the OpenAI Responses API.
 21// This API is required for models like gpt-5.1-codex.
 22// Fields should not be altered concurrently with calling any method on ResponsesService.
 23type ResponsesService struct {
 24	HTTPC         *http.Client      // defaults to http.DefaultClient if nil
 25	APIKey        string            // optional, if not set will try to load from env var
 26	Model         Model             // defaults to DefaultModel if zero value
 27	ModelURL      string            // optional, overrides Model.URL
 28	MaxTokens     int               // defaults to DefaultMaxTokens if zero
 29	Org           string            // optional - organization ID
 30	DumpLLM       bool              // whether to dump request/response text to files for debugging; defaults to false
 31	ThinkingLevel llm.ThinkingLevel // thinking level (ThinkingLevelOff disables reasoning)
 32}
 33
 34var _ llm.Service = (*ResponsesService)(nil)
 35
 36// Responses API request/response types
 37
 38type responsesRequest struct {
 39	Model           string               `json:"model"`
 40	Input           []responsesInputItem `json:"input"`
 41	Tools           []responsesTool      `json:"tools,omitempty"`
 42	ToolChoice      any                  `json:"tool_choice,omitempty"`
 43	MaxOutputTokens int                  `json:"max_output_tokens,omitempty"`
 44	Reasoning       *responsesReasoning  `json:"reasoning,omitempty"`
 45}
 46
 47type responsesReasoning struct {
 48	Effort string `json:"effort,omitempty"` // "low", "medium", "high"
 49}
 50
 51type responsesInputItem struct {
 52	Type      string             `json:"type"`                // "message", "function_call", "function_call_output"
 53	Role      string             `json:"role,omitempty"`      // for messages: "user", "assistant"
 54	Content   []responsesContent `json:"content,omitempty"`   // for messages
 55	CallID    string             `json:"call_id,omitempty"`   // for function_call and function_call_output
 56	Name      string             `json:"name,omitempty"`      // for function_call
 57	Arguments string             `json:"arguments,omitempty"` // for function_call
 58	Output    string             `json:"output,omitempty"`    // for function_call_output
 59}
 60
 61type responsesContent struct {
 62	Type string `json:"type"` // "input_text", "output_text"
 63	Text string `json:"text"`
 64}
 65
 66type responsesTool struct {
 67	Type        string          `json:"type"` // "function"
 68	Name        string          `json:"name"`
 69	Description string          `json:"description,omitempty"`
 70	Parameters  json.RawMessage `json:"parameters,omitempty"`
 71}
 72
 73type responsesResponse struct {
 74	ID        string                `json:"id"`
 75	Object    string                `json:"object"` // "response"
 76	CreatedAt int64                 `json:"created_at"`
 77	Status    string                `json:"status"` // "completed", "incomplete", etc.
 78	Model     string                `json:"model"`
 79	Output    []responsesOutputItem `json:"output"`
 80	Usage     responsesUsage        `json:"usage"`
 81	Error     *responsesError       `json:"error"`
 82}
 83
 84type responsesOutputItem struct {
 85	ID        string             `json:"id"`
 86	Type      string             `json:"type"`           // "message", "reasoning", "function_call"
 87	Role      string             `json:"role,omitempty"` // for messages: "assistant"
 88	Status    string             `json:"status,omitempty"`
 89	Content   []responsesContent `json:"content,omitempty"`   // for messages
 90	CallID    string             `json:"call_id,omitempty"`   // for function_call
 91	Name      string             `json:"name,omitempty"`      // for function_call
 92	Arguments string             `json:"arguments,omitempty"` // for function_call
 93	Summary   []string           `json:"summary,omitempty"`   // for reasoning
 94}
 95
 96type responsesUsage struct {
 97	InputTokens         int                           `json:"input_tokens"`
 98	InputTokensDetails  *responsesInputTokensDetails  `json:"input_tokens_details,omitempty"`
 99	OutputTokens        int                           `json:"output_tokens"`
100	OutputTokensDetails *responsesOutputTokensDetails `json:"output_tokens_details,omitempty"`
101	TotalTokens         int                           `json:"total_tokens"`
102}
103
104type responsesInputTokensDetails struct {
105	CachedTokens int `json:"cached_tokens"`
106}
107
108type responsesOutputTokensDetails struct {
109	ReasoningTokens int `json:"reasoning_tokens"`
110}
111
112type responsesError struct {
113	Message string `json:"message"`
114	Type    string `json:"type"`
115	Param   string `json:"param"`
116	Code    string `json:"code"`
117}
118
119// fromLLMMessageResponses converts llm.Message to Responses API input items
120func fromLLMMessageResponses(msg llm.Message) []responsesInputItem {
121	var items []responsesInputItem
122
123	// Separate tool results from regular content
124	var regularContent []llm.Content
125	var toolResults []llm.Content
126
127	for _, c := range msg.Content {
128		if c.Type == llm.ContentTypeToolResult {
129			toolResults = append(toolResults, c)
130		} else {
131			regularContent = append(regularContent, c)
132		}
133	}
134
135	// Process tool results first - they need to come before the assistant message
136	for _, tr := range toolResults {
137		// Collect all text from content objects
138		var texts []string
139		for _, result := range tr.ToolResult {
140			if strings.TrimSpace(result.Text) != "" {
141				texts = append(texts, result.Text)
142			}
143		}
144		toolResultContent := strings.Join(texts, "\n")
145
146		// Add error prefix if needed
147		if tr.ToolError {
148			if toolResultContent != "" {
149				toolResultContent = "error: " + toolResultContent
150			} else {
151				toolResultContent = "error: tool execution failed"
152			}
153		}
154
155		items = append(items, responsesInputItem{
156			Type:   "function_call_output",
157			CallID: tr.ToolUseID,
158			Output: cmp.Or(toolResultContent, " "),
159		})
160	}
161
162	// Process regular content
163	if len(regularContent) > 0 {
164		var messageContent []responsesContent
165		var functionCalls []responsesInputItem
166
167		for _, c := range regularContent {
168			switch c.Type {
169			case llm.ContentTypeText:
170				if c.Text != "" {
171					contentType := "input_text"
172					if msg.Role == llm.MessageRoleAssistant {
173						contentType = "output_text"
174					}
175					messageContent = append(messageContent, responsesContent{
176						Type: contentType,
177						Text: c.Text,
178					})
179				}
180			case llm.ContentTypeToolUse:
181				// Tool use becomes a function_call in the input
182				functionCalls = append(functionCalls, responsesInputItem{
183					Type:      "function_call",
184					CallID:    c.ID,
185					Name:      c.ToolName,
186					Arguments: string(c.ToolInput),
187				})
188			}
189		}
190
191		// Add message if it has content
192		if len(messageContent) > 0 {
193			role := "user"
194			if msg.Role == llm.MessageRoleAssistant {
195				role = "assistant"
196			}
197			items = append(items, responsesInputItem{
198				Type:    "message",
199				Role:    role,
200				Content: messageContent,
201			})
202		}
203
204		// Add function calls
205		items = append(items, functionCalls...)
206	}
207
208	return items
209}
210
211// fromLLMToolResponses converts llm.Tool to Responses API tool format
212func fromLLMToolResponses(t *llm.Tool) responsesTool {
213	return responsesTool{
214		Type:        "function",
215		Name:        t.Name,
216		Description: t.Description,
217		Parameters:  t.InputSchema,
218	}
219}
220
221// fromLLMSystemResponses converts llm.SystemContent to Responses API input items
222func fromLLMSystemResponses(systemContent []llm.SystemContent) []responsesInputItem {
223	if len(systemContent) == 0 {
224		return nil
225	}
226
227	// Combine all system content into a single system message
228	var systemText string
229	for i, content := range systemContent {
230		if i > 0 && systemText != "" && content.Text != "" {
231			systemText += "\n"
232		}
233		systemText += content.Text
234	}
235
236	if systemText == "" {
237		return nil
238	}
239
240	return []responsesInputItem{
241		{
242			Type: "message",
243			Role: "user",
244			Content: []responsesContent{
245				{
246					Type: "input_text",
247					Text: systemText,
248				},
249			},
250		},
251	}
252}
253
254// toLLMResponseFromResponses converts Responses API response to llm.Response
255func (s *ResponsesService) toLLMResponseFromResponses(resp *responsesResponse, headers http.Header) *llm.Response {
256	if len(resp.Output) == 0 {
257		return &llm.Response{
258			ID:    resp.ID,
259			Model: resp.Model,
260			Role:  llm.MessageRoleAssistant,
261			Usage: s.toLLMUsageFromResponses(resp.Usage, headers),
262		}
263	}
264
265	// Process the output items
266	var contents []llm.Content
267	var stopReason llm.StopReason = llm.StopReasonStopSequence
268
269	for _, item := range resp.Output {
270		switch item.Type {
271		case "message":
272			// Convert message content
273			for _, c := range item.Content {
274				if c.Text != "" {
275					contents = append(contents, llm.Content{
276						Type: llm.ContentTypeText,
277						Text: c.Text,
278					})
279				}
280			}
281		case "reasoning":
282			// Convert reasoning to thinking content
283			if len(item.Summary) > 0 {
284				summaryText := strings.Join(item.Summary, "\n")
285				contents = append(contents, llm.Content{
286					Type: llm.ContentTypeThinking,
287					Text: summaryText,
288				})
289			}
290		case "function_call":
291			// Convert function call to tool use
292			contents = append(contents, llm.Content{
293				ID:        item.CallID,
294				Type:      llm.ContentTypeToolUse,
295				ToolName:  item.Name,
296				ToolInput: json.RawMessage(item.Arguments),
297			})
298			stopReason = llm.StopReasonToolUse
299		}
300	}
301
302	// If no content, add empty text content
303	if len(contents) == 0 {
304		contents = append(contents, llm.Content{
305			Type: llm.ContentTypeText,
306			Text: "",
307		})
308	}
309
310	return &llm.Response{
311		ID:         resp.ID,
312		Model:      resp.Model,
313		Role:       llm.MessageRoleAssistant,
314		Content:    contents,
315		StopReason: stopReason,
316		Usage:      s.toLLMUsageFromResponses(resp.Usage, headers),
317	}
318}
319
320// toLLMUsageFromResponses converts Responses API usage to llm.Usage
321func (s *ResponsesService) toLLMUsageFromResponses(usage responsesUsage, headers http.Header) llm.Usage {
322	in := uint64(usage.InputTokens)
323	var inc uint64
324	if usage.InputTokensDetails != nil {
325		inc = uint64(usage.InputTokensDetails.CachedTokens)
326	}
327	out := uint64(usage.OutputTokens)
328	u := llm.Usage{
329		InputTokens:              in,
330		CacheReadInputTokens:     inc,
331		CacheCreationInputTokens: in,
332		OutputTokens:             out,
333	}
334	u.CostUSD = llm.CostUSDFromResponse(headers)
335	return u
336}
337
338// TokenContextWindow returns the maximum token context window size for this service
339func (s *ResponsesService) TokenContextWindow() int {
340	model := cmp.Or(s.Model, DefaultModel)
341
342	// Use the same context window logic as the regular service
343	switch model.ModelName {
344	case "gpt-5.3-codex":
345		return 288000 // 288k for gpt-5.3-codex
346	case "gpt-5.2-codex":
347		return 272000 // 272k for gpt-5.2-codex
348	case "gpt-5.1-codex":
349		return 256000 // 256k for gpt-5.1-codex
350	case "gpt-4.1-2025-04-14", "gpt-4.1-mini-2025-04-14", "gpt-4.1-nano-2025-04-14":
351		return 200000
352	case "gpt-4o-2024-08-06", "gpt-4o-mini-2024-07-18":
353		return 128000
354	default:
355		return 128000
356	}
357}
358
359// MaxImageDimension returns the maximum allowed image dimension.
360// TODO: determine actual OpenAI image dimension limits
361func (s *ResponsesService) MaxImageDimension() int {
362	return 0 // No known limit
363}
364
365// Do sends a request to OpenAI using the Responses API.
366func (s *ResponsesService) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) {
367	httpc := cmp.Or(s.HTTPC, http.DefaultClient)
368	model := cmp.Or(s.Model, DefaultModel)
369
370	// Start with system messages if provided
371	var allInput []responsesInputItem
372	if len(ir.System) > 0 {
373		sysItems := fromLLMSystemResponses(ir.System)
374		allInput = append(allInput, sysItems...)
375	}
376
377	// Add regular messages
378	for _, msg := range ir.Messages {
379		items := fromLLMMessageResponses(msg)
380		allInput = append(allInput, items...)
381	}
382
383	// Convert tools
384	var tools []responsesTool
385	for _, t := range ir.Tools {
386		tools = append(tools, fromLLMToolResponses(t))
387	}
388
389	// Create the request
390	req := responsesRequest{
391		Model:           model.ModelName,
392		Input:           allInput,
393		Tools:           tools,
394		MaxOutputTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens),
395	}
396
397	// Add reasoning if thinking is enabled
398	if s.ThinkingLevel != llm.ThinkingLevelOff {
399		effort := s.ThinkingLevel.ThinkingEffort()
400		if effort != "" {
401			req.Reasoning = &responsesReasoning{Effort: effort}
402		}
403	}
404
405	// Add tool choice if specified
406	if ir.ToolChoice != nil {
407		req.ToolChoice = fromLLMToolChoice(ir.ToolChoice)
408	}
409
410	// Construct the full URL
411	baseURL := cmp.Or(s.ModelURL, model.URL, OpenAIURL)
412	fullURL := baseURL + "/responses"
413
414	// Marshal the request
415	reqJSON, err := json.Marshal(req)
416	if err != nil {
417		return nil, fmt.Errorf("failed to marshal request: %w", err)
418	}
419
420	// Dump request if enabled
421	if s.DumpLLM {
422		if reqJSONPretty, err := json.MarshalIndent(req, "", "  "); err == nil {
423			if err := llm.DumpToFile("request", fullURL, reqJSONPretty); err != nil {
424				slog.WarnContext(ctx, "failed to dump responses request to file", "error", err)
425			}
426		}
427	}
428
429	// Retry mechanism
430	backoff := []time.Duration{1 * time.Second, 2 * time.Second, 5 * time.Second, 10 * time.Second, 15 * time.Second}
431
432	// retry loop
433	var errs error // accumulated errors across all attempts
434	for attempts := 0; ; attempts++ {
435		if attempts > 10 {
436			return nil, fmt.Errorf("responses request failed after %d attempts (url=%s, model=%s): %w", attempts, fullURL, model.ModelName, errs)
437		}
438		if attempts > 0 {
439			sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
440			slog.WarnContext(ctx, "responses request sleep before retry", "sleep", sleep, "attempts", attempts)
441			time.Sleep(sleep)
442		}
443
444		// Create HTTP request
445		httpReq, err := http.NewRequestWithContext(ctx, "POST", fullURL, bytes.NewReader(reqJSON))
446		if err != nil {
447			return nil, fmt.Errorf("failed to create request: %w", err)
448		}
449
450		httpReq.Header.Set("Content-Type", "application/json")
451		httpReq.Header.Set("Authorization", "Bearer "+s.APIKey)
452		if s.Org != "" {
453			httpReq.Header.Set("OpenAI-Organization", s.Org)
454		}
455
456		// Send request
457		httpResp, err := httpc.Do(httpReq)
458		if err != nil {
459			errs = errors.Join(errs, fmt.Errorf("attempt %d: %w", attempts+1, err))
460			continue
461		}
462		defer httpResp.Body.Close()
463
464		// Read response body
465		body, err := io.ReadAll(httpResp.Body)
466		if err != nil {
467			return nil, fmt.Errorf("failed to read response body: %w", err)
468		}
469
470		// Handle non-200 responses
471		if httpResp.StatusCode != http.StatusOK {
472			var apiErr responsesError
473			if jsonErr := json.Unmarshal(body, &struct {
474				Error *responsesError `json:"error"`
475			}{Error: &apiErr}); jsonErr == nil && apiErr.Message != "" {
476				// We have a structured error
477				switch {
478				case httpResp.StatusCode >= 500:
479					// Server error, retry
480					slog.WarnContext(ctx, "responses_request_failed", "error", apiErr.Message, "status_code", httpResp.StatusCode, "url", fullURL, "model", model.ModelName)
481					errs = errors.Join(errs, fmt.Errorf("status %d (url=%s, model=%s): %s", httpResp.StatusCode, fullURL, model.ModelName, apiErr.Message))
482					continue
483
484				case httpResp.StatusCode == 429:
485					// Rate limited, retry
486					slog.WarnContext(ctx, "responses_request_rate_limited", "error", apiErr.Message, "url", fullURL, "model", model.ModelName)
487					errs = errors.Join(errs, fmt.Errorf("status %d (rate limited, url=%s, model=%s): %s", httpResp.StatusCode, fullURL, model.ModelName, apiErr.Message))
488					continue
489
490				case httpResp.StatusCode >= 400 && httpResp.StatusCode < 500:
491					// Client error, probably unrecoverable
492					slog.WarnContext(ctx, "responses_request_failed", "error", apiErr.Message, "status_code", httpResp.StatusCode, "url", fullURL, "model", model.ModelName)
493					return nil, errors.Join(errs, fmt.Errorf("status %d (url=%s, model=%s): %s", httpResp.StatusCode, fullURL, model.ModelName, apiErr.Message))
494				}
495			}
496
497			// No structured error, use the raw body
498			slog.WarnContext(ctx, "responses_request_failed", "status_code", httpResp.StatusCode, "url", fullURL, "model", model.ModelName, "body", string(body))
499			return nil, fmt.Errorf("status %d (url=%s, model=%s): %s", httpResp.StatusCode, fullURL, model.ModelName, string(body))
500		}
501
502		// Parse successful response
503		var resp responsesResponse
504		if err := json.Unmarshal(body, &resp); err != nil {
505			return nil, fmt.Errorf("failed to unmarshal response: %w", err)
506		}
507
508		// Check for errors in the response
509		if resp.Error != nil {
510			return nil, fmt.Errorf("response contains error: %s", resp.Error.Message)
511		}
512
513		// Dump response if enabled
514		if s.DumpLLM {
515			if respJSON, err := json.MarshalIndent(resp, "", "  "); err == nil {
516				if err := llm.DumpToFile("response", "", respJSON); err != nil {
517					slog.WarnContext(ctx, "failed to dump responses response to file", "error", err)
518				}
519			}
520		}
521
522		return s.toLLMResponseFromResponses(&resp, httpResp.Header), nil
523	}
524}
525
526func (s *ResponsesService) UseSimplifiedPatch() bool {
527	return s.Model.UseSimplifiedPatch
528}
529
530// ConfigDetails returns configuration information for logging
531func (s *ResponsesService) ConfigDetails() map[string]string {
532	model := cmp.Or(s.Model, DefaultModel)
533	baseURL := cmp.Or(s.ModelURL, model.URL, OpenAIURL)
534	return map[string]string{
535		"base_url":        baseURL,
536		"model_name":      model.ModelName,
537		"full_url":        baseURL + "/responses",
538		"api_key_env":     model.APIKeyEnv,
539		"has_api_key_set": fmt.Sprintf("%v", s.APIKey != ""),
540	}
541}