Add Responses provider chaining metadata

Michael Suchacz created

Change summary

providers/openai/responses_language_model.go | 145 +++++++++++++++++----
providers/openai/responses_options.go        |  66 +++++++--
2 files changed, 170 insertions(+), 41 deletions(-)

Detailed changes

providers/openai/responses_language_model.go 🔗

@@ -4,6 +4,7 @@ import (
 	"context"
 	"encoding/base64"
 	"encoding/json"
+	"errors"
 	"fmt"
 	"reflect"
 	"strings"
@@ -28,8 +29,7 @@ type responsesLanguageModel struct {
 	noDefaultUserAgent bool
 }
 
-// newResponsesLanguageModel implements a responses api model
-// INFO: (kujtim) currently we do not support stored parameter we default it to false.
+// newResponsesLanguageModel implements a responses api model.
 func newResponsesLanguageModel(modelID string, provider string, client openai.Client, objectMode fantasy.ObjectMode, noDefaultUserAgent bool) responsesLanguageModel {
 	return responsesLanguageModel{
 		modelID:            modelID,
@@ -121,11 +121,11 @@ func getResponsesModelConfig(modelID string) responsesModelConfig {
 	}
 }
 
-func (o responsesLanguageModel) prepareParams(call fantasy.Call) (*responses.ResponseNewParams, []fantasy.CallWarning) {
+const previousResponseIDHistoryError = "cannot combine previous_response_id with replayed conversation history; use either previous_response_id (server-side chaining) or explicit message replay, not both"
+
+func (o responsesLanguageModel) prepareParams(call fantasy.Call) (*responses.ResponseNewParams, []fantasy.CallWarning, error) {
 	var warnings []fantasy.CallWarning
-	params := &responses.ResponseNewParams{
-		Store: param.NewOpt(false),
-	}
+	params := &responses.ResponseNewParams{}
 
 	modelConfig := getResponsesModelConfig(o.modelID)
 
@@ -157,6 +157,19 @@ func (o responsesLanguageModel) prepareParams(call fantasy.Call) (*responses.Res
 		}
 	}
 
+	if openaiOptions != nil && openaiOptions.Store != nil {
+		params.Store = param.NewOpt(*openaiOptions.Store)
+	} else {
+		params.Store = param.NewOpt(false)
+	}
+
+	if openaiOptions != nil && openaiOptions.PreviousResponseID != nil {
+		if err := validatePreviousResponseIDPrompt(call.Prompt); err != nil {
+			return nil, warnings, err
+		}
+		params.PreviousResponseID = param.NewOpt(*openaiOptions.PreviousResponseID)
+	}
+
 	input, inputWarnings := toResponsesPrompt(call.Prompt, modelConfig.systemMessageMode)
 	warnings = append(warnings, inputWarnings...)
 
@@ -328,7 +341,28 @@ func (o responsesLanguageModel) prepareParams(call fantasy.Call) (*responses.Res
 		params.ToolChoice = toolChoice
 	}
 
-	return params, warnings
+	return params, warnings, nil
+}
+
+func validatePreviousResponseIDPrompt(prompt fantasy.Prompt) error {
+	for _, msg := range prompt {
+		if msg.Role == fantasy.MessageRoleAssistant {
+			return errors.New(previousResponseIDHistoryError)
+		}
+	}
+	return nil
+}
+
+func responsesProviderMetadata(responseID string) fantasy.ProviderMetadata {
+	if responseID == "" {
+		return fantasy.ProviderMetadata{}
+	}
+
+	return fantasy.ProviderMetadata{
+		Name: &ResponsesProviderMetadata{
+			ResponseID: responseID,
+		},
+	}
 }
 
 func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string) (responses.ResponseInputParam, []fantasy.CallWarning) {
@@ -512,7 +546,7 @@ func toResponsesPrompt(prompt fantasy.Prompt, systemMessageMode string) (respons
 						continue
 					}
 					// we want to always send an empty array
-					summary := []responses.ResponseReasoningItemSummaryParam{}
+					summary := make([]responses.ResponseReasoningItemSummaryParam, 0, len(reasoningMetadata.Summary))
 					for _, s := range reasoningMetadata.Summary {
 						summary = append(summary, responses.ResponseReasoningItemSummaryParam{
 							Type: "summary_text",
@@ -695,7 +729,11 @@ func toResponsesTools(tools []fantasy.Tool, toolChoice *fantasy.ToolChoice, opti
 }
 
 func (o responsesLanguageModel) Generate(ctx context.Context, call fantasy.Call) (*fantasy.Response, error) {
-	params, warnings := o.prepareParams(call)
+	params, warnings, err := o.prepareParams(call)
+	if err != nil {
+		return nil, err
+	}
+
 	response, err := o.client.Responses.New(ctx, *params, callUARequestOptions(call, o.noDefaultUserAgent)...)
 	if err != nil {
 		return nil, toProviderErr(err)
@@ -831,7 +869,7 @@ func (o responsesLanguageModel) Generate(ctx context.Context, call fantasy.Call)
 		Content:          content,
 		Usage:            usage,
 		FinishReason:     finishReason,
-		ProviderMetadata: fantasy.ProviderMetadata{},
+		ProviderMetadata: responsesProviderMetadata(response.ID),
 		Warnings:         warnings,
 	}, nil
 }
@@ -854,12 +892,16 @@ func mapResponsesFinishReason(reason string, hasFunctionCall bool) fantasy.Finis
 }
 
 func (o responsesLanguageModel) Stream(ctx context.Context, call fantasy.Call) (fantasy.StreamResponse, error) {
-	params, warnings := o.prepareParams(call)
+	params, warnings, err := o.prepareParams(call)
+	if err != nil {
+		return nil, err
+	}
 
 	stream := o.client.Responses.NewStreaming(ctx, *params, callUARequestOptions(call, o.noDefaultUserAgent)...)
 
 	finishReason := fantasy.FinishReasonUnknown
 	var usage fantasy.Usage
+	responseID := ""
 	ongoingToolCalls := make(map[int64]*ongoingToolCall)
 	hasFunctionCall := false
 	activeReasoning := make(map[string]*reasoningState)
@@ -879,7 +921,8 @@ func (o responsesLanguageModel) Stream(ctx context.Context, call fantasy.Call) (
 
 			switch event.Type {
 			case "response.created":
-				_ = event.AsResponseCreated()
+				created := event.AsResponseCreated()
+				responseID = created.Response.ID
 
 			case "response.output_item.added":
 				added := event.AsResponseOutputItemAdded()
@@ -1080,8 +1123,9 @@ func (o responsesLanguageModel) Stream(ctx context.Context, call fantasy.Call) (
 					}
 				}
 
-			case "response.completed", "response.incomplete":
+			case "response.completed":
 				completed := event.AsResponseCompleted()
+				responseID = completed.Response.ID
 				finishReason = mapResponsesFinishReason(completed.Response.IncompleteDetails.Reason, hasFunctionCall)
 				usage = fantasy.Usage{
 					InputTokens:  completed.Response.Usage.InputTokens,
@@ -1095,6 +1139,22 @@ func (o responsesLanguageModel) Stream(ctx context.Context, call fantasy.Call) (
 					usage.CacheReadTokens = completed.Response.Usage.InputTokensDetails.CachedTokens
 				}
 
+			case "response.incomplete":
+				incomplete := event.AsResponseIncomplete()
+				responseID = incomplete.Response.ID
+				finishReason = mapResponsesFinishReason(incomplete.Response.IncompleteDetails.Reason, hasFunctionCall)
+				usage = fantasy.Usage{
+					InputTokens:  incomplete.Response.Usage.InputTokens,
+					OutputTokens: incomplete.Response.Usage.OutputTokens,
+					TotalTokens:  incomplete.Response.Usage.InputTokens + incomplete.Response.Usage.OutputTokens,
+				}
+				if incomplete.Response.Usage.OutputTokensDetails.ReasoningTokens != 0 {
+					usage.ReasoningTokens = incomplete.Response.Usage.OutputTokensDetails.ReasoningTokens
+				}
+				if incomplete.Response.Usage.InputTokensDetails.CachedTokens != 0 {
+					usage.CacheReadTokens = incomplete.Response.Usage.InputTokensDetails.CachedTokens
+				}
+
 			case "error":
 				errorEvent := event.AsError()
 				if !yield(fantasy.StreamPart{
@@ -1117,9 +1177,10 @@ func (o responsesLanguageModel) Stream(ctx context.Context, call fantasy.Call) (
 		}
 
 		yield(fantasy.StreamPart{
-			Type:         fantasy.StreamPartTypeFinish,
-			Usage:        usage,
-			FinishReason: finishReason,
+			Type:             fantasy.StreamPartTypeFinish,
+			Usage:            usage,
+			FinishReason:     finishReason,
+			ProviderMetadata: responsesProviderMetadata(responseID),
 		})
 	}, nil
 }
@@ -1247,7 +1308,10 @@ func (o responsesLanguageModel) generateObjectWithJSONMode(ctx context.Context,
 		ProviderOptions:  call.ProviderOptions,
 	}
 
-	params, warnings := o.prepareParams(fantasyCall)
+	params, warnings, err := o.prepareParams(fantasyCall)
+	if err != nil {
+		return nil, err
+	}
 
 	// Add structured output via Text.Format field
 	params.Text = responses.ResponseTextConfigParam{
@@ -1327,11 +1391,12 @@ func (o responsesLanguageModel) generateObjectWithJSONMode(ctx context.Context,
 	}
 
 	return &fantasy.ObjectResponse{
-		Object:       obj,
-		RawText:      jsonText,
-		Usage:        usage,
-		FinishReason: finishReason,
-		Warnings:     warnings,
+		Object:           obj,
+		RawText:          jsonText,
+		Usage:            usage,
+		FinishReason:     finishReason,
+		Warnings:         warnings,
+		ProviderMetadata: responsesProviderMetadata(response.ID),
 	}, nil
 }
 
@@ -1358,7 +1423,10 @@ func (o responsesLanguageModel) streamObjectWithJSONMode(ctx context.Context, ca
 		ProviderOptions:  call.ProviderOptions,
 	}
 
-	params, warnings := o.prepareParams(fantasyCall)
+	params, warnings, err := o.prepareParams(fantasyCall)
+	if err != nil {
+		return nil, err
+	}
 
 	// Add structured output via Text.Format field
 	params.Text = responses.ResponseTextConfigParam{
@@ -1381,6 +1449,7 @@ func (o responsesLanguageModel) streamObjectWithJSONMode(ctx context.Context, ca
 		var lastParsedObject any
 		var usage fantasy.Usage
 		var finishReason fantasy.FinishReason
+		var responseID string
 		var streamErr error
 		hasFunctionCall := false
 
@@ -1388,6 +1457,10 @@ func (o responsesLanguageModel) streamObjectWithJSONMode(ctx context.Context, ca
 			event := stream.Current()
 
 			switch event.Type {
+			case "response.created":
+				created := event.AsResponseCreated()
+				responseID = created.Response.ID
+
 			case "response.output_text.delta":
 				textDelta := event.AsResponseOutputTextDelta()
 				accumulated += textDelta.Delta
@@ -1431,8 +1504,9 @@ func (o responsesLanguageModel) streamObjectWithJSONMode(ctx context.Context, ca
 					}
 				}
 
-			case "response.completed", "response.incomplete":
+			case "response.completed":
 				completed := event.AsResponseCompleted()
+				responseID = completed.Response.ID
 				finishReason = mapResponsesFinishReason(completed.Response.IncompleteDetails.Reason, hasFunctionCall)
 				usage = fantasy.Usage{
 					InputTokens:  completed.Response.Usage.InputTokens,
@@ -1446,6 +1520,22 @@ func (o responsesLanguageModel) streamObjectWithJSONMode(ctx context.Context, ca
 					usage.CacheReadTokens = completed.Response.Usage.InputTokensDetails.CachedTokens
 				}
 
+			case "response.incomplete":
+				incomplete := event.AsResponseIncomplete()
+				responseID = incomplete.Response.ID
+				finishReason = mapResponsesFinishReason(incomplete.Response.IncompleteDetails.Reason, hasFunctionCall)
+				usage = fantasy.Usage{
+					InputTokens:  incomplete.Response.Usage.InputTokens,
+					OutputTokens: incomplete.Response.Usage.OutputTokens,
+					TotalTokens:  incomplete.Response.Usage.InputTokens + incomplete.Response.Usage.OutputTokens,
+				}
+				if incomplete.Response.Usage.OutputTokensDetails.ReasoningTokens != 0 {
+					usage.ReasoningTokens = incomplete.Response.Usage.OutputTokensDetails.ReasoningTokens
+				}
+				if incomplete.Response.Usage.InputTokensDetails.CachedTokens != 0 {
+					usage.CacheReadTokens = incomplete.Response.Usage.InputTokensDetails.CachedTokens
+				}
+
 			case "error":
 				errorEvent := event.AsError()
 				streamErr = fmt.Errorf("response error: %s (code: %s)", errorEvent.Message, errorEvent.Code)
@@ -1471,9 +1561,10 @@ func (o responsesLanguageModel) streamObjectWithJSONMode(ctx context.Context, ca
 		// Final validation and emit
 		if streamErr == nil && lastParsedObject != nil {
 			yield(fantasy.ObjectStreamPart{
-				Type:         fantasy.ObjectStreamPartTypeFinish,
-				Usage:        usage,
-				FinishReason: finishReason,
+				Type:             fantasy.ObjectStreamPartTypeFinish,
+				Usage:            usage,
+				FinishReason:     finishReason,
+				ProviderMetadata: responsesProviderMetadata(responseID),
 			})
 		} else if streamErr == nil && lastParsedObject == nil {
 			// No object was generated

providers/openai/responses_options.go 🔗

@@ -10,6 +10,7 @@ import (
 
 // Global type identifiers for OpenAI Responses API-specific data.
 const (
+	TypeResponsesProviderMetadata  = Name + ".responses.metadata"
 	TypeResponsesProviderOptions   = Name + ".responses.options"
 	TypeResponsesReasoningMetadata = Name + ".responses.reasoning_metadata"
 	TypeWebSearchCallMetadata      = Name + ".responses.web_search_call_metadata"
@@ -17,6 +18,13 @@ const (
 
 // Register OpenAI Responses API-specific types with the global registry.
 func init() {
+	fantasy.RegisterProviderType(TypeResponsesProviderMetadata, func(data []byte) (fantasy.ProviderOptionsData, error) {
+		var v ResponsesProviderMetadata
+		if err := json.Unmarshal(data, &v); err != nil {
+			return nil, err
+		}
+		return &v, nil
+	})
 	fantasy.RegisterProviderType(TypeResponsesProviderOptions, func(data []byte) (fantasy.ProviderOptionsData, error) {
 		var v ResponsesProviderOptions
 		if err := json.Unmarshal(data, &v); err != nil {
@@ -40,6 +48,34 @@ func init() {
 	})
 }
 
+// ResponsesProviderMetadata contains response-level metadata from the OpenAI Responses API.
+// The ResponseID can be used as PreviousResponseID in follow-up requests to chain responses.
+type ResponsesProviderMetadata struct {
+	ResponseID string `json:"response_id"`
+}
+
+var _ fantasy.ProviderOptionsData = (*ResponsesProviderMetadata)(nil)
+
+// Options implements the ProviderOptions interface.
+func (*ResponsesProviderMetadata) Options() {}
+
+// MarshalJSON implements custom JSON marshaling with type info for ResponsesProviderMetadata.
+func (m ResponsesProviderMetadata) MarshalJSON() ([]byte, error) {
+	type plain ResponsesProviderMetadata
+	return fantasy.MarshalProviderType(TypeResponsesProviderMetadata, plain(m))
+}
+
+// UnmarshalJSON implements custom JSON unmarshaling with type info for ResponsesProviderMetadata.
+func (m *ResponsesProviderMetadata) UnmarshalJSON(data []byte) error {
+	type plain ResponsesProviderMetadata
+	var p plain
+	if err := fantasy.UnmarshalProviderType(data, &p); err != nil {
+		return err
+	}
+	*m = ResponsesProviderMetadata(p)
+	return nil
+}
+
 // ResponsesReasoningMetadata represents reasoning metadata for OpenAI Responses API.
 type ResponsesReasoningMetadata struct {
 	ItemID           string   `json:"item_id"`
@@ -105,20 +141,22 @@ const (
 
 // ResponsesProviderOptions represents additional options for OpenAI Responses API.
 type ResponsesProviderOptions struct {
-	Include           []IncludeType    `json:"include"`
-	Instructions      *string          `json:"instructions"`
-	Logprobs          any              `json:"logprobs"`
-	MaxToolCalls      *int64           `json:"max_tool_calls"`
-	Metadata          map[string]any   `json:"metadata"`
-	ParallelToolCalls *bool            `json:"parallel_tool_calls"`
-	PromptCacheKey    *string          `json:"prompt_cache_key"`
-	ReasoningEffort   *ReasoningEffort `json:"reasoning_effort"`
-	ReasoningSummary  *string          `json:"reasoning_summary"`
-	SafetyIdentifier  *string          `json:"safety_identifier"`
-	ServiceTier       *ServiceTier     `json:"service_tier"`
-	StrictJSONSchema  *bool            `json:"strict_json_schema"`
-	TextVerbosity     *TextVerbosity   `json:"text_verbosity"`
-	User              *string          `json:"user"`
+	Include            []IncludeType    `json:"include"`
+	Instructions       *string          `json:"instructions"`
+	Logprobs           any              `json:"logprobs"`
+	MaxToolCalls       *int64           `json:"max_tool_calls"`
+	Metadata           map[string]any   `json:"metadata"`
+	ParallelToolCalls  *bool            `json:"parallel_tool_calls"`
+	PreviousResponseID *string          `json:"previous_response_id"`
+	PromptCacheKey     *string          `json:"prompt_cache_key"`
+	ReasoningEffort    *ReasoningEffort `json:"reasoning_effort"`
+	ReasoningSummary   *string          `json:"reasoning_summary"`
+	SafetyIdentifier   *string          `json:"safety_identifier"`
+	ServiceTier        *ServiceTier     `json:"service_tier"`
+	Store              *bool            `json:"store"`
+	StrictJSONSchema   *bool            `json:"strict_json_schema"`
+	TextVerbosity      *TextVerbosity   `json:"text_verbosity"`
+	User               *string          `json:"user"`
 }
 
 // Options implements the ProviderOptions interface.