language_model_hooks.go

  1package openai
  2
  3import (
  4	"encoding/base64"
  5	"fmt"
  6	"strings"
  7
  8	"charm.land/fantasy"
  9	"github.com/openai/openai-go/v2"
 10	"github.com/openai/openai-go/v2/packages/param"
 11	"github.com/openai/openai-go/v2/shared"
 12)
 13
 14// LanguageModelPrepareCallFunc is a function that prepares the call for the language model.
 15type LanguageModelPrepareCallFunc = func(model fantasy.LanguageModel, params *openai.ChatCompletionNewParams, call fantasy.Call) ([]fantasy.CallWarning, error)
 16
 17// LanguageModelMapFinishReasonFunc is a function that maps the finish reason for the language model.
 18type LanguageModelMapFinishReasonFunc = func(finishReason string) fantasy.FinishReason
 19
 20// LanguageModelUsageFunc is a function that calculates usage for the language model.
 21type LanguageModelUsageFunc = func(choice openai.ChatCompletion) (fantasy.Usage, fantasy.ProviderOptionsData)
 22
 23// LanguageModelExtraContentFunc is a function that adds extra content for the language model.
 24type LanguageModelExtraContentFunc = func(choice openai.ChatCompletionChoice) []fantasy.Content
 25
 26// LanguageModelStreamExtraFunc is a function that handles stream extra functionality for the language model.
 27type LanguageModelStreamExtraFunc = func(chunk openai.ChatCompletionChunk, yield func(fantasy.StreamPart) bool, ctx map[string]any) (map[string]any, bool)
 28
 29// LanguageModelStreamUsageFunc is a function that calculates stream usage for the language model.
 30type LanguageModelStreamUsageFunc = func(chunk openai.ChatCompletionChunk, ctx map[string]any, metadata fantasy.ProviderMetadata) (fantasy.Usage, fantasy.ProviderMetadata)
 31
 32// LanguageModelStreamProviderMetadataFunc is a function that handles stream provider metadata for the language model.
 33type LanguageModelStreamProviderMetadataFunc = func(choice openai.ChatCompletionChoice, metadata fantasy.ProviderMetadata) fantasy.ProviderMetadata
 34
 35// LanguageModelToPromptFunc is a function that handles converting fantasy prompts to openai sdk messages.
 36type LanguageModelToPromptFunc = func(prompt fantasy.Prompt, provider, model string) ([]openai.ChatCompletionMessageParamUnion, []fantasy.CallWarning)
 37
 38// DefaultPrepareCallFunc is the default implementation for preparing a call to the language model.
 39func DefaultPrepareCallFunc(model fantasy.LanguageModel, params *openai.ChatCompletionNewParams, call fantasy.Call) ([]fantasy.CallWarning, error) {
 40	if call.ProviderOptions == nil {
 41		return nil, nil
 42	}
 43	var warnings []fantasy.CallWarning
 44	providerOptions := &ProviderOptions{}
 45	if v, ok := call.ProviderOptions[Name]; ok {
 46		providerOptions, ok = v.(*ProviderOptions)
 47		if !ok {
 48			return nil, fantasy.NewInvalidArgumentError("providerOptions", "openai provider options should be *openai.ProviderOptions", nil)
 49		}
 50	}
 51
 52	if providerOptions.LogitBias != nil {
 53		params.LogitBias = providerOptions.LogitBias
 54	}
 55	if providerOptions.LogProbs != nil && providerOptions.TopLogProbs != nil {
 56		providerOptions.LogProbs = nil
 57	}
 58	if providerOptions.LogProbs != nil {
 59		params.Logprobs = param.NewOpt(*providerOptions.LogProbs)
 60	}
 61	if providerOptions.TopLogProbs != nil {
 62		params.TopLogprobs = param.NewOpt(*providerOptions.TopLogProbs)
 63	}
 64	if providerOptions.User != nil {
 65		params.User = param.NewOpt(*providerOptions.User)
 66	}
 67	if providerOptions.ParallelToolCalls != nil {
 68		params.ParallelToolCalls = param.NewOpt(*providerOptions.ParallelToolCalls)
 69	}
 70	if providerOptions.MaxCompletionTokens != nil {
 71		params.MaxCompletionTokens = param.NewOpt(*providerOptions.MaxCompletionTokens)
 72	}
 73
 74	if providerOptions.TextVerbosity != nil {
 75		params.Verbosity = openai.ChatCompletionNewParamsVerbosity(*providerOptions.TextVerbosity)
 76	}
 77	if providerOptions.Prediction != nil {
 78		// Convert map[string]any to ChatCompletionPredictionContentParam
 79		if content, ok := providerOptions.Prediction["content"]; ok {
 80			if contentStr, ok := content.(string); ok {
 81				params.Prediction = openai.ChatCompletionPredictionContentParam{
 82					Content: openai.ChatCompletionPredictionContentContentUnionParam{
 83						OfString: param.NewOpt(contentStr),
 84					},
 85				}
 86			}
 87		}
 88	}
 89	if providerOptions.Store != nil {
 90		params.Store = param.NewOpt(*providerOptions.Store)
 91	}
 92	if providerOptions.Metadata != nil {
 93		// Convert map[string]any to map[string]string
 94		metadata := make(map[string]string)
 95		for k, v := range providerOptions.Metadata {
 96			if str, ok := v.(string); ok {
 97				metadata[k] = str
 98			}
 99		}
100		params.Metadata = metadata
101	}
102	if providerOptions.PromptCacheKey != nil {
103		params.PromptCacheKey = param.NewOpt(*providerOptions.PromptCacheKey)
104	}
105	if providerOptions.SafetyIdentifier != nil {
106		params.SafetyIdentifier = param.NewOpt(*providerOptions.SafetyIdentifier)
107	}
108	if providerOptions.ServiceTier != nil {
109		params.ServiceTier = openai.ChatCompletionNewParamsServiceTier(*providerOptions.ServiceTier)
110	}
111
112	if providerOptions.ReasoningEffort != nil {
113		switch *providerOptions.ReasoningEffort {
114		case ReasoningEffortMinimal:
115			params.ReasoningEffort = shared.ReasoningEffortMinimal
116		case ReasoningEffortLow:
117			params.ReasoningEffort = shared.ReasoningEffortLow
118		case ReasoningEffortMedium:
119			params.ReasoningEffort = shared.ReasoningEffortMedium
120		case ReasoningEffortHigh:
121			params.ReasoningEffort = shared.ReasoningEffortHigh
122		default:
123			return nil, fmt.Errorf("reasoning model `%s` not supported", *providerOptions.ReasoningEffort)
124		}
125	}
126
127	if isReasoningModel(model.Model()) {
128		if providerOptions.LogitBias != nil {
129			params.LogitBias = nil
130			warnings = append(warnings, fantasy.CallWarning{
131				Type:    fantasy.CallWarningTypeUnsupportedSetting,
132				Setting: "LogitBias",
133				Message: "LogitBias is not supported for reasoning models",
134			})
135		}
136		if providerOptions.LogProbs != nil {
137			params.Logprobs = param.Opt[bool]{}
138			warnings = append(warnings, fantasy.CallWarning{
139				Type:    fantasy.CallWarningTypeUnsupportedSetting,
140				Setting: "Logprobs",
141				Message: "Logprobs is not supported for reasoning models",
142			})
143		}
144		if providerOptions.TopLogProbs != nil {
145			params.TopLogprobs = param.Opt[int64]{}
146			warnings = append(warnings, fantasy.CallWarning{
147				Type:    fantasy.CallWarningTypeUnsupportedSetting,
148				Setting: "TopLogprobs",
149				Message: "TopLogprobs is not supported for reasoning models",
150			})
151		}
152	}
153
154	// Handle service tier validation
155	if providerOptions.ServiceTier != nil {
156		serviceTier := *providerOptions.ServiceTier
157		if serviceTier == "flex" && !supportsFlexProcessing(model.Model()) {
158			params.ServiceTier = ""
159			warnings = append(warnings, fantasy.CallWarning{
160				Type:    fantasy.CallWarningTypeUnsupportedSetting,
161				Setting: "ServiceTier",
162				Details: "flex processing is only available for o3, o4-mini, and gpt-5 models",
163			})
164		} else if serviceTier == "priority" && !supportsPriorityProcessing(model.Model()) {
165			params.ServiceTier = ""
166			warnings = append(warnings, fantasy.CallWarning{
167				Type:    fantasy.CallWarningTypeUnsupportedSetting,
168				Setting: "ServiceTier",
169				Details: "priority processing is only available for supported models (gpt-4, gpt-5, gpt-5-mini, o3, o4-mini) and requires Enterprise access. gpt-5-nano is not supported",
170			})
171		}
172	}
173	return warnings, nil
174}
175
176// DefaultMapFinishReasonFunc is the default implementation for mapping finish reasons.
177func DefaultMapFinishReasonFunc(finishReason string) fantasy.FinishReason {
178	switch finishReason {
179	case "stop":
180		return fantasy.FinishReasonStop
181	case "length":
182		return fantasy.FinishReasonLength
183	case "content_filter":
184		return fantasy.FinishReasonContentFilter
185	case "function_call", "tool_calls":
186		return fantasy.FinishReasonToolCalls
187	default:
188		return fantasy.FinishReasonUnknown
189	}
190}
191
192// DefaultUsageFunc is the default implementation for calculating usage.
193func DefaultUsageFunc(response openai.ChatCompletion) (fantasy.Usage, fantasy.ProviderOptionsData) {
194	completionTokenDetails := response.Usage.CompletionTokensDetails
195	promptTokenDetails := response.Usage.PromptTokensDetails
196
197	// Build provider metadata
198	providerMetadata := &ProviderMetadata{}
199
200	// Add logprobs if available
201	if len(response.Choices) > 0 && len(response.Choices[0].Logprobs.Content) > 0 {
202		providerMetadata.Logprobs = response.Choices[0].Logprobs.Content
203	}
204
205	// Add prediction tokens if available
206	if completionTokenDetails.AcceptedPredictionTokens > 0 || completionTokenDetails.RejectedPredictionTokens > 0 {
207		if completionTokenDetails.AcceptedPredictionTokens > 0 {
208			providerMetadata.AcceptedPredictionTokens = completionTokenDetails.AcceptedPredictionTokens
209		}
210		if completionTokenDetails.RejectedPredictionTokens > 0 {
211			providerMetadata.RejectedPredictionTokens = completionTokenDetails.RejectedPredictionTokens
212		}
213	}
214	return fantasy.Usage{
215		InputTokens:     response.Usage.PromptTokens,
216		OutputTokens:    response.Usage.CompletionTokens,
217		TotalTokens:     response.Usage.TotalTokens,
218		ReasoningTokens: completionTokenDetails.ReasoningTokens,
219		CacheReadTokens: promptTokenDetails.CachedTokens,
220	}, providerMetadata
221}
222
223// DefaultStreamUsageFunc is the default implementation for calculating stream usage.
224func DefaultStreamUsageFunc(chunk openai.ChatCompletionChunk, _ map[string]any, metadata fantasy.ProviderMetadata) (fantasy.Usage, fantasy.ProviderMetadata) {
225	if chunk.Usage.TotalTokens == 0 {
226		return fantasy.Usage{}, nil
227	}
228	streamProviderMetadata := &ProviderMetadata{}
229	if metadata != nil {
230		if providerMetadata, ok := metadata[Name]; ok {
231			converted, ok := providerMetadata.(*ProviderMetadata)
232			if ok {
233				streamProviderMetadata = converted
234			}
235		}
236	}
237	// we do this here because the acc does not add prompt details
238	completionTokenDetails := chunk.Usage.CompletionTokensDetails
239	promptTokenDetails := chunk.Usage.PromptTokensDetails
240	usage := fantasy.Usage{
241		InputTokens:     chunk.Usage.PromptTokens,
242		OutputTokens:    chunk.Usage.CompletionTokens,
243		TotalTokens:     chunk.Usage.TotalTokens,
244		ReasoningTokens: completionTokenDetails.ReasoningTokens,
245		CacheReadTokens: promptTokenDetails.CachedTokens,
246	}
247
248	// Add prediction tokens if available
249	if completionTokenDetails.AcceptedPredictionTokens > 0 || completionTokenDetails.RejectedPredictionTokens > 0 {
250		if completionTokenDetails.AcceptedPredictionTokens > 0 {
251			streamProviderMetadata.AcceptedPredictionTokens = completionTokenDetails.AcceptedPredictionTokens
252		}
253		if completionTokenDetails.RejectedPredictionTokens > 0 {
254			streamProviderMetadata.RejectedPredictionTokens = completionTokenDetails.RejectedPredictionTokens
255		}
256	}
257
258	return usage, fantasy.ProviderMetadata{
259		Name: streamProviderMetadata,
260	}
261}
262
263// DefaultStreamProviderMetadataFunc is the default implementation for handling stream provider metadata.
264func DefaultStreamProviderMetadataFunc(choice openai.ChatCompletionChoice, metadata fantasy.ProviderMetadata) fantasy.ProviderMetadata {
265	streamProviderMetadata, ok := metadata[Name]
266	if !ok {
267		streamProviderMetadata = &ProviderMetadata{}
268	}
269	if converted, ok := streamProviderMetadata.(*ProviderMetadata); ok {
270		converted.Logprobs = choice.Logprobs.Content
271		metadata[Name] = converted
272	}
273	return metadata
274}
275
276// DefaultToPrompt converts a fantasy prompt to OpenAI format with default handling.
277func DefaultToPrompt(prompt fantasy.Prompt, _, _ string) ([]openai.ChatCompletionMessageParamUnion, []fantasy.CallWarning) {
278	var messages []openai.ChatCompletionMessageParamUnion
279	var warnings []fantasy.CallWarning
280	for _, msg := range prompt {
281		switch msg.Role {
282		case fantasy.MessageRoleSystem:
283			var systemPromptParts []string
284			for _, c := range msg.Content {
285				if c.GetType() != fantasy.ContentTypeText {
286					warnings = append(warnings, fantasy.CallWarning{
287						Type:    fantasy.CallWarningTypeOther,
288						Message: "system prompt can only have text content",
289					})
290					continue
291				}
292				textPart, ok := fantasy.AsContentType[fantasy.TextPart](c)
293				if !ok {
294					warnings = append(warnings, fantasy.CallWarning{
295						Type:    fantasy.CallWarningTypeOther,
296						Message: "system prompt text part does not have the right type",
297					})
298					continue
299				}
300				text := textPart.Text
301				if strings.TrimSpace(text) != "" {
302					systemPromptParts = append(systemPromptParts, textPart.Text)
303				}
304			}
305			if len(systemPromptParts) == 0 {
306				warnings = append(warnings, fantasy.CallWarning{
307					Type:    fantasy.CallWarningTypeOther,
308					Message: "system prompt has no text parts",
309				})
310				continue
311			}
312			messages = append(messages, openai.SystemMessage(strings.Join(systemPromptParts, "\n")))
313		case fantasy.MessageRoleUser:
314			// simple user message just text content
315			if len(msg.Content) == 1 && msg.Content[0].GetType() == fantasy.ContentTypeText {
316				textPart, ok := fantasy.AsContentType[fantasy.TextPart](msg.Content[0])
317				if !ok {
318					warnings = append(warnings, fantasy.CallWarning{
319						Type:    fantasy.CallWarningTypeOther,
320						Message: "user message text part does not have the right type",
321					})
322					continue
323				}
324				messages = append(messages, openai.UserMessage(textPart.Text))
325				continue
326			}
327			// text content and attachments
328			// for now we only support image content later we need to check
329			// TODO: add the supported media types to the language model so we
330			//  can use that to validate the data here.
331			var content []openai.ChatCompletionContentPartUnionParam
332			for _, c := range msg.Content {
333				switch c.GetType() {
334				case fantasy.ContentTypeText:
335					textPart, ok := fantasy.AsContentType[fantasy.TextPart](c)
336					if !ok {
337						warnings = append(warnings, fantasy.CallWarning{
338							Type:    fantasy.CallWarningTypeOther,
339							Message: "user message text part does not have the right type",
340						})
341						continue
342					}
343					content = append(content, openai.ChatCompletionContentPartUnionParam{
344						OfText: &openai.ChatCompletionContentPartTextParam{
345							Text: textPart.Text,
346						},
347					})
348				case fantasy.ContentTypeFile:
349					filePart, ok := fantasy.AsContentType[fantasy.FilePart](c)
350					if !ok {
351						warnings = append(warnings, fantasy.CallWarning{
352							Type:    fantasy.CallWarningTypeOther,
353							Message: "user message file part does not have the right type",
354						})
355						continue
356					}
357
358					switch {
359					case strings.HasPrefix(filePart.MediaType, "image/"):
360						// Handle image files
361						base64Encoded := base64.StdEncoding.EncodeToString(filePart.Data)
362						data := "data:" + filePart.MediaType + ";base64," + base64Encoded
363						imageURL := openai.ChatCompletionContentPartImageImageURLParam{URL: data}
364
365						// Check for provider-specific options like image detail
366						if providerOptions, ok := filePart.ProviderOptions[Name]; ok {
367							if detail, ok := providerOptions.(*ProviderFileOptions); ok {
368								imageURL.Detail = detail.ImageDetail
369							}
370						}
371
372						imageBlock := openai.ChatCompletionContentPartImageParam{ImageURL: imageURL}
373						content = append(content, openai.ChatCompletionContentPartUnionParam{OfImageURL: &imageBlock})
374
375					case filePart.MediaType == "audio/wav":
376						// Handle WAV audio files
377						base64Encoded := base64.StdEncoding.EncodeToString(filePart.Data)
378						audioBlock := openai.ChatCompletionContentPartInputAudioParam{
379							InputAudio: openai.ChatCompletionContentPartInputAudioInputAudioParam{
380								Data:   base64Encoded,
381								Format: "wav",
382							},
383						}
384						content = append(content, openai.ChatCompletionContentPartUnionParam{OfInputAudio: &audioBlock})
385
386					case filePart.MediaType == "audio/mpeg" || filePart.MediaType == "audio/mp3":
387						// Handle MP3 audio files
388						base64Encoded := base64.StdEncoding.EncodeToString(filePart.Data)
389						audioBlock := openai.ChatCompletionContentPartInputAudioParam{
390							InputAudio: openai.ChatCompletionContentPartInputAudioInputAudioParam{
391								Data:   base64Encoded,
392								Format: "mp3",
393							},
394						}
395						content = append(content, openai.ChatCompletionContentPartUnionParam{OfInputAudio: &audioBlock})
396
397					case filePart.MediaType == "application/pdf":
398						// Handle PDF files
399						dataStr := string(filePart.Data)
400
401						// Check if data looks like a file ID (starts with "file-")
402						if strings.HasPrefix(dataStr, "file-") {
403							fileBlock := openai.ChatCompletionContentPartFileParam{
404								File: openai.ChatCompletionContentPartFileFileParam{
405									FileID: param.NewOpt(dataStr),
406								},
407							}
408							content = append(content, openai.ChatCompletionContentPartUnionParam{OfFile: &fileBlock})
409						} else {
410							// Handle as base64 data
411							base64Encoded := base64.StdEncoding.EncodeToString(filePart.Data)
412							data := "data:application/pdf;base64," + base64Encoded
413
414							filename := filePart.Filename
415							if filename == "" {
416								// Generate default filename based on content index
417								filename = fmt.Sprintf("part-%d.pdf", len(content))
418							}
419
420							fileBlock := openai.ChatCompletionContentPartFileParam{
421								File: openai.ChatCompletionContentPartFileFileParam{
422									Filename: param.NewOpt(filename),
423									FileData: param.NewOpt(data),
424								},
425							}
426							content = append(content, openai.ChatCompletionContentPartUnionParam{OfFile: &fileBlock})
427						}
428
429					default:
430						warnings = append(warnings, fantasy.CallWarning{
431							Type:    fantasy.CallWarningTypeOther,
432							Message: fmt.Sprintf("file part media type %s not supported", filePart.MediaType),
433						})
434					}
435				}
436			}
437			messages = append(messages, openai.UserMessage(content))
438		case fantasy.MessageRoleAssistant:
439			// simple assistant message just text content
440			if len(msg.Content) == 1 && msg.Content[0].GetType() == fantasy.ContentTypeText {
441				textPart, ok := fantasy.AsContentType[fantasy.TextPart](msg.Content[0])
442				if !ok {
443					warnings = append(warnings, fantasy.CallWarning{
444						Type:    fantasy.CallWarningTypeOther,
445						Message: "assistant message text part does not have the right type",
446					})
447					continue
448				}
449				messages = append(messages, openai.AssistantMessage(textPart.Text))
450				continue
451			}
452			assistantMsg := openai.ChatCompletionAssistantMessageParam{
453				Role: "assistant",
454			}
455			for _, c := range msg.Content {
456				switch c.GetType() {
457				case fantasy.ContentTypeText:
458					textPart, ok := fantasy.AsContentType[fantasy.TextPart](c)
459					if !ok {
460						warnings = append(warnings, fantasy.CallWarning{
461							Type:    fantasy.CallWarningTypeOther,
462							Message: "assistant message text part does not have the right type",
463						})
464						continue
465					}
466					assistantMsg.Content = openai.ChatCompletionAssistantMessageParamContentUnion{
467						OfString: param.NewOpt(textPart.Text),
468					}
469				case fantasy.ContentTypeToolCall:
470					toolCallPart, ok := fantasy.AsContentType[fantasy.ToolCallPart](c)
471					if !ok {
472						warnings = append(warnings, fantasy.CallWarning{
473							Type:    fantasy.CallWarningTypeOther,
474							Message: "assistant message tool part does not have the right type",
475						})
476						continue
477					}
478					assistantMsg.ToolCalls = append(assistantMsg.ToolCalls,
479						openai.ChatCompletionMessageToolCallUnionParam{
480							OfFunction: &openai.ChatCompletionMessageFunctionToolCallParam{
481								ID:   toolCallPart.ToolCallID,
482								Type: "function",
483								Function: openai.ChatCompletionMessageFunctionToolCallFunctionParam{
484									Name:      toolCallPart.ToolName,
485									Arguments: toolCallPart.Input,
486								},
487							},
488						})
489				}
490			}
491			messages = append(messages, openai.ChatCompletionMessageParamUnion{
492				OfAssistant: &assistantMsg,
493			})
494		case fantasy.MessageRoleTool:
495			for _, c := range msg.Content {
496				if c.GetType() != fantasy.ContentTypeToolResult {
497					warnings = append(warnings, fantasy.CallWarning{
498						Type:    fantasy.CallWarningTypeOther,
499						Message: "tool message can only have tool result content",
500					})
501					continue
502				}
503
504				toolResultPart, ok := fantasy.AsContentType[fantasy.ToolResultPart](c)
505				if !ok {
506					warnings = append(warnings, fantasy.CallWarning{
507						Type:    fantasy.CallWarningTypeOther,
508						Message: "tool message result part does not have the right type",
509					})
510					continue
511				}
512
513				switch toolResultPart.Output.GetType() {
514				case fantasy.ToolResultContentTypeText:
515					output, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentText](toolResultPart.Output)
516					if !ok {
517						warnings = append(warnings, fantasy.CallWarning{
518							Type:    fantasy.CallWarningTypeOther,
519							Message: "tool result output does not have the right type",
520						})
521						continue
522					}
523					messages = append(messages, openai.ToolMessage(output.Text, toolResultPart.ToolCallID))
524				case fantasy.ToolResultContentTypeError:
525					// TODO: check if better handling is needed
526					output, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentError](toolResultPart.Output)
527					if !ok {
528						warnings = append(warnings, fantasy.CallWarning{
529							Type:    fantasy.CallWarningTypeOther,
530							Message: "tool result output does not have the right type",
531						})
532						continue
533					}
534					messages = append(messages, openai.ToolMessage(output.Error.Error(), toolResultPart.ToolCallID))
535				}
536			}
537		}
538	}
539	return messages, warnings
540}