language_model_hooks.go

  1package openai
  2
  3import (
  4	"encoding/base64"
  5	"fmt"
  6	"strings"
  7
  8	"charm.land/fantasy"
  9	"github.com/openai/openai-go/v2"
 10	"github.com/openai/openai-go/v2/packages/param"
 11	"github.com/openai/openai-go/v2/shared"
 12)
 13
 14// LanguageModelPrepareCallFunc is a function that prepares the call for the language model.
 15type LanguageModelPrepareCallFunc = func(model fantasy.LanguageModel, params *openai.ChatCompletionNewParams, call fantasy.Call) ([]fantasy.CallWarning, error)
 16
 17// LanguageModelMapFinishReasonFunc is a function that maps the finish reason for the language model.
 18type LanguageModelMapFinishReasonFunc = func(finishReason string) fantasy.FinishReason
 19
 20// LanguageModelUsageFunc is a function that calculates usage for the language model.
 21type LanguageModelUsageFunc = func(choice openai.ChatCompletion) (fantasy.Usage, fantasy.ProviderOptionsData)
 22
 23// LanguageModelExtraContentFunc is a function that adds extra content for the language model.
 24type LanguageModelExtraContentFunc = func(choice openai.ChatCompletionChoice) []fantasy.Content
 25
 26// LanguageModelStreamExtraFunc is a function that handles stream extra functionality for the language model.
 27type LanguageModelStreamExtraFunc = func(chunk openai.ChatCompletionChunk, yield func(fantasy.StreamPart) bool, ctx map[string]any) (map[string]any, bool)
 28
 29// LanguageModelStreamUsageFunc is a function that calculates stream usage for the language model.
 30type LanguageModelStreamUsageFunc = func(chunk openai.ChatCompletionChunk, ctx map[string]any, metadata fantasy.ProviderMetadata) (fantasy.Usage, fantasy.ProviderMetadata)
 31
 32// LanguageModelStreamProviderMetadataFunc is a function that handles stream provider metadata for the language model.
 33type LanguageModelStreamProviderMetadataFunc = func(choice openai.ChatCompletionChoice, metadata fantasy.ProviderMetadata) fantasy.ProviderMetadata
 34
 35// LanguageModelToPromptFunc is a function that handles converting fantasy prompts to openai sdk messages.
 36type LanguageModelToPromptFunc = func(prompt fantasy.Prompt, provider, model string) ([]openai.ChatCompletionMessageParamUnion, []fantasy.CallWarning)
 37
 38// DefaultPrepareCallFunc is the default implementation for preparing a call to the language model.
 39func DefaultPrepareCallFunc(model fantasy.LanguageModel, params *openai.ChatCompletionNewParams, call fantasy.Call) ([]fantasy.CallWarning, error) {
 40	if call.ProviderOptions == nil {
 41		return nil, nil
 42	}
 43	var warnings []fantasy.CallWarning
 44	providerOptions := &ProviderOptions{}
 45	if v, ok := call.ProviderOptions[Name]; ok {
 46		providerOptions, ok = v.(*ProviderOptions)
 47		if !ok {
 48			return nil, &fantasy.Error{Title: "invalid argument", Message: "openai provider options should be *openai.ProviderOptions"}
 49		}
 50	}
 51
 52	if providerOptions.LogitBias != nil {
 53		params.LogitBias = providerOptions.LogitBias
 54	}
 55	if providerOptions.LogProbs != nil && providerOptions.TopLogProbs != nil {
 56		providerOptions.LogProbs = nil
 57	}
 58	if providerOptions.LogProbs != nil {
 59		params.Logprobs = param.NewOpt(*providerOptions.LogProbs)
 60	}
 61	if providerOptions.TopLogProbs != nil {
 62		params.TopLogprobs = param.NewOpt(*providerOptions.TopLogProbs)
 63	}
 64	if providerOptions.User != nil {
 65		params.User = param.NewOpt(*providerOptions.User)
 66	}
 67	if providerOptions.ParallelToolCalls != nil {
 68		params.ParallelToolCalls = param.NewOpt(*providerOptions.ParallelToolCalls)
 69	}
 70	if providerOptions.MaxCompletionTokens != nil {
 71		params.MaxCompletionTokens = param.NewOpt(*providerOptions.MaxCompletionTokens)
 72	}
 73
 74	if providerOptions.TextVerbosity != nil {
 75		params.Verbosity = openai.ChatCompletionNewParamsVerbosity(*providerOptions.TextVerbosity)
 76	}
 77	if providerOptions.Prediction != nil {
 78		// Convert map[string]any to ChatCompletionPredictionContentParam
 79		if content, ok := providerOptions.Prediction["content"]; ok {
 80			if contentStr, ok := content.(string); ok {
 81				params.Prediction = openai.ChatCompletionPredictionContentParam{
 82					Content: openai.ChatCompletionPredictionContentContentUnionParam{
 83						OfString: param.NewOpt(contentStr),
 84					},
 85				}
 86			}
 87		}
 88	}
 89	if providerOptions.Store != nil {
 90		params.Store = param.NewOpt(*providerOptions.Store)
 91	}
 92	if providerOptions.Metadata != nil {
 93		// Convert map[string]any to map[string]string
 94		metadata := make(map[string]string)
 95		for k, v := range providerOptions.Metadata {
 96			if str, ok := v.(string); ok {
 97				metadata[k] = str
 98			}
 99		}
100		params.Metadata = metadata
101	}
102	if providerOptions.PromptCacheKey != nil {
103		params.PromptCacheKey = param.NewOpt(*providerOptions.PromptCacheKey)
104	}
105	if providerOptions.SafetyIdentifier != nil {
106		params.SafetyIdentifier = param.NewOpt(*providerOptions.SafetyIdentifier)
107	}
108	if providerOptions.ServiceTier != nil {
109		params.ServiceTier = openai.ChatCompletionNewParamsServiceTier(*providerOptions.ServiceTier)
110	}
111
112	if providerOptions.ReasoningEffort != nil {
113		switch *providerOptions.ReasoningEffort {
114		case ReasoningEffortMinimal:
115			params.ReasoningEffort = shared.ReasoningEffortMinimal
116		case ReasoningEffortLow:
117			params.ReasoningEffort = shared.ReasoningEffortLow
118		case ReasoningEffortMedium:
119			params.ReasoningEffort = shared.ReasoningEffortMedium
120		case ReasoningEffortHigh:
121			params.ReasoningEffort = shared.ReasoningEffortHigh
122		default:
123			return nil, fmt.Errorf("reasoning model `%s` not supported", *providerOptions.ReasoningEffort)
124		}
125	}
126
127	if isReasoningModel(model.Model()) {
128		if providerOptions.LogitBias != nil {
129			params.LogitBias = nil
130			warnings = append(warnings, fantasy.CallWarning{
131				Type:    fantasy.CallWarningTypeUnsupportedSetting,
132				Setting: "LogitBias",
133				Message: "LogitBias is not supported for reasoning models",
134			})
135		}
136		if providerOptions.LogProbs != nil {
137			params.Logprobs = param.Opt[bool]{}
138			warnings = append(warnings, fantasy.CallWarning{
139				Type:    fantasy.CallWarningTypeUnsupportedSetting,
140				Setting: "Logprobs",
141				Message: "Logprobs is not supported for reasoning models",
142			})
143		}
144		if providerOptions.TopLogProbs != nil {
145			params.TopLogprobs = param.Opt[int64]{}
146			warnings = append(warnings, fantasy.CallWarning{
147				Type:    fantasy.CallWarningTypeUnsupportedSetting,
148				Setting: "TopLogprobs",
149				Message: "TopLogprobs is not supported for reasoning models",
150			})
151		}
152	}
153
154	// Handle service tier validation
155	if providerOptions.ServiceTier != nil {
156		serviceTier := *providerOptions.ServiceTier
157		if serviceTier == "flex" && !supportsFlexProcessing(model.Model()) {
158			params.ServiceTier = ""
159			warnings = append(warnings, fantasy.CallWarning{
160				Type:    fantasy.CallWarningTypeUnsupportedSetting,
161				Setting: "ServiceTier",
162				Details: "flex processing is only available for o3, o4-mini, and gpt-5 models",
163			})
164		} else if serviceTier == "priority" && !supportsPriorityProcessing(model.Model()) {
165			params.ServiceTier = ""
166			warnings = append(warnings, fantasy.CallWarning{
167				Type:    fantasy.CallWarningTypeUnsupportedSetting,
168				Setting: "ServiceTier",
169				Details: "priority processing is only available for supported models (gpt-4, gpt-5, gpt-5-mini, o3, o4-mini) and requires Enterprise access. gpt-5-nano is not supported",
170			})
171		}
172	}
173	return warnings, nil
174}
175
176// DefaultMapFinishReasonFunc is the default implementation for mapping finish reasons.
177func DefaultMapFinishReasonFunc(finishReason string) fantasy.FinishReason {
178	switch finishReason {
179	case "stop":
180		return fantasy.FinishReasonStop
181	case "length":
182		return fantasy.FinishReasonLength
183	case "content_filter":
184		return fantasy.FinishReasonContentFilter
185	case "function_call", "tool_calls":
186		return fantasy.FinishReasonToolCalls
187	default:
188		return fantasy.FinishReasonUnknown
189	}
190}
191
192// DefaultUsageFunc is the default implementation for calculating usage.
193func DefaultUsageFunc(response openai.ChatCompletion) (fantasy.Usage, fantasy.ProviderOptionsData) {
194	completionTokenDetails := response.Usage.CompletionTokensDetails
195	promptTokenDetails := response.Usage.PromptTokensDetails
196
197	// Build provider metadata
198	providerMetadata := &ProviderMetadata{}
199
200	// Add logprobs if available
201	if len(response.Choices) > 0 && len(response.Choices[0].Logprobs.Content) > 0 {
202		providerMetadata.Logprobs = response.Choices[0].Logprobs.Content
203	}
204
205	// Add prediction tokens if available
206	if completionTokenDetails.AcceptedPredictionTokens > 0 || completionTokenDetails.RejectedPredictionTokens > 0 {
207		if completionTokenDetails.AcceptedPredictionTokens > 0 {
208			providerMetadata.AcceptedPredictionTokens = completionTokenDetails.AcceptedPredictionTokens
209		}
210		if completionTokenDetails.RejectedPredictionTokens > 0 {
211			providerMetadata.RejectedPredictionTokens = completionTokenDetails.RejectedPredictionTokens
212		}
213	}
214	return fantasy.Usage{
215		InputTokens:     response.Usage.PromptTokens,
216		OutputTokens:    response.Usage.CompletionTokens,
217		TotalTokens:     response.Usage.TotalTokens,
218		ReasoningTokens: completionTokenDetails.ReasoningTokens,
219		CacheReadTokens: promptTokenDetails.CachedTokens,
220	}, providerMetadata
221}
222
223// DefaultStreamUsageFunc is the default implementation for calculating stream usage.
224func DefaultStreamUsageFunc(chunk openai.ChatCompletionChunk, _ map[string]any, metadata fantasy.ProviderMetadata) (fantasy.Usage, fantasy.ProviderMetadata) {
225	if chunk.Usage.TotalTokens == 0 {
226		return fantasy.Usage{}, nil
227	}
228	streamProviderMetadata := &ProviderMetadata{}
229	if metadata != nil {
230		if providerMetadata, ok := metadata[Name]; ok {
231			converted, ok := providerMetadata.(*ProviderMetadata)
232			if ok {
233				streamProviderMetadata = converted
234			}
235		}
236	}
237	// we do this here because the acc does not add prompt details
238	completionTokenDetails := chunk.Usage.CompletionTokensDetails
239	promptTokenDetails := chunk.Usage.PromptTokensDetails
240	usage := fantasy.Usage{
241		InputTokens:     chunk.Usage.PromptTokens,
242		OutputTokens:    chunk.Usage.CompletionTokens,
243		TotalTokens:     chunk.Usage.TotalTokens,
244		ReasoningTokens: completionTokenDetails.ReasoningTokens,
245		CacheReadTokens: promptTokenDetails.CachedTokens,
246	}
247
248	// Add prediction tokens if available
249	if completionTokenDetails.AcceptedPredictionTokens > 0 || completionTokenDetails.RejectedPredictionTokens > 0 {
250		if completionTokenDetails.AcceptedPredictionTokens > 0 {
251			streamProviderMetadata.AcceptedPredictionTokens = completionTokenDetails.AcceptedPredictionTokens
252		}
253		if completionTokenDetails.RejectedPredictionTokens > 0 {
254			streamProviderMetadata.RejectedPredictionTokens = completionTokenDetails.RejectedPredictionTokens
255		}
256	}
257
258	return usage, fantasy.ProviderMetadata{
259		Name: streamProviderMetadata,
260	}
261}
262
263// DefaultStreamProviderMetadataFunc is the default implementation for handling stream provider metadata.
264func DefaultStreamProviderMetadataFunc(choice openai.ChatCompletionChoice, metadata fantasy.ProviderMetadata) fantasy.ProviderMetadata {
265	if metadata == nil {
266		metadata = fantasy.ProviderMetadata{}
267	}
268	streamProviderMetadata, ok := metadata[Name]
269	if !ok {
270		streamProviderMetadata = &ProviderMetadata{}
271	}
272	if converted, ok := streamProviderMetadata.(*ProviderMetadata); ok {
273		converted.Logprobs = choice.Logprobs.Content
274		metadata[Name] = converted
275	}
276	return metadata
277}
278
279// DefaultToPrompt converts a fantasy prompt to OpenAI format with default handling.
280func DefaultToPrompt(prompt fantasy.Prompt, _, _ string) ([]openai.ChatCompletionMessageParamUnion, []fantasy.CallWarning) {
281	var messages []openai.ChatCompletionMessageParamUnion
282	var warnings []fantasy.CallWarning
283	for _, msg := range prompt {
284		switch msg.Role {
285		case fantasy.MessageRoleSystem:
286			var systemPromptParts []string
287			for _, c := range msg.Content {
288				if c.GetType() != fantasy.ContentTypeText {
289					warnings = append(warnings, fantasy.CallWarning{
290						Type:    fantasy.CallWarningTypeOther,
291						Message: "system prompt can only have text content",
292					})
293					continue
294				}
295				textPart, ok := fantasy.AsContentType[fantasy.TextPart](c)
296				if !ok {
297					warnings = append(warnings, fantasy.CallWarning{
298						Type:    fantasy.CallWarningTypeOther,
299						Message: "system prompt text part does not have the right type",
300					})
301					continue
302				}
303				text := textPart.Text
304				if strings.TrimSpace(text) != "" {
305					systemPromptParts = append(systemPromptParts, textPart.Text)
306				}
307			}
308			if len(systemPromptParts) == 0 {
309				warnings = append(warnings, fantasy.CallWarning{
310					Type:    fantasy.CallWarningTypeOther,
311					Message: "system prompt has no text parts",
312				})
313				continue
314			}
315			messages = append(messages, openai.SystemMessage(strings.Join(systemPromptParts, "\n")))
316		case fantasy.MessageRoleUser:
317			// simple user message just text content
318			if len(msg.Content) == 1 && msg.Content[0].GetType() == fantasy.ContentTypeText {
319				textPart, ok := fantasy.AsContentType[fantasy.TextPart](msg.Content[0])
320				if !ok {
321					warnings = append(warnings, fantasy.CallWarning{
322						Type:    fantasy.CallWarningTypeOther,
323						Message: "user message text part does not have the right type",
324					})
325					continue
326				}
327				messages = append(messages, openai.UserMessage(textPart.Text))
328				continue
329			}
330			// text content and attachments
331			// for now we only support image content later we need to check
332			// TODO: add the supported media types to the language model so we
333			//  can use that to validate the data here.
334			var content []openai.ChatCompletionContentPartUnionParam
335			for _, c := range msg.Content {
336				switch c.GetType() {
337				case fantasy.ContentTypeText:
338					textPart, ok := fantasy.AsContentType[fantasy.TextPart](c)
339					if !ok {
340						warnings = append(warnings, fantasy.CallWarning{
341							Type:    fantasy.CallWarningTypeOther,
342							Message: "user message text part does not have the right type",
343						})
344						continue
345					}
346					content = append(content, openai.ChatCompletionContentPartUnionParam{
347						OfText: &openai.ChatCompletionContentPartTextParam{
348							Text: textPart.Text,
349						},
350					})
351				case fantasy.ContentTypeFile:
352					filePart, ok := fantasy.AsContentType[fantasy.FilePart](c)
353					if !ok {
354						warnings = append(warnings, fantasy.CallWarning{
355							Type:    fantasy.CallWarningTypeOther,
356							Message: "user message file part does not have the right type",
357						})
358						continue
359					}
360
361					switch {
362					case strings.HasPrefix(filePart.MediaType, "text/"):
363						base64Encoded := base64.StdEncoding.EncodeToString(filePart.Data)
364						content = append(content, openai.FileContentPart(openai.ChatCompletionContentPartFileFileParam{
365							FileData: param.NewOpt(base64Encoded),
366						}))
367					case strings.HasPrefix(filePart.MediaType, "image/"):
368						// Handle image files
369						base64Encoded := base64.StdEncoding.EncodeToString(filePart.Data)
370						data := "data:" + filePart.MediaType + ";base64," + base64Encoded
371						imageURL := openai.ChatCompletionContentPartImageImageURLParam{URL: data}
372
373						// Check for provider-specific options like image detail
374						if providerOptions, ok := filePart.ProviderOptions[Name]; ok {
375							if detail, ok := providerOptions.(*ProviderFileOptions); ok {
376								imageURL.Detail = detail.ImageDetail
377							}
378						}
379
380						imageBlock := openai.ChatCompletionContentPartImageParam{ImageURL: imageURL}
381						content = append(content, openai.ChatCompletionContentPartUnionParam{OfImageURL: &imageBlock})
382
383					case filePart.MediaType == "audio/wav":
384						// Handle WAV audio files
385						base64Encoded := base64.StdEncoding.EncodeToString(filePart.Data)
386						audioBlock := openai.ChatCompletionContentPartInputAudioParam{
387							InputAudio: openai.ChatCompletionContentPartInputAudioInputAudioParam{
388								Data:   base64Encoded,
389								Format: "wav",
390							},
391						}
392						content = append(content, openai.ChatCompletionContentPartUnionParam{OfInputAudio: &audioBlock})
393
394					case filePart.MediaType == "audio/mpeg" || filePart.MediaType == "audio/mp3":
395						// Handle MP3 audio files
396						base64Encoded := base64.StdEncoding.EncodeToString(filePart.Data)
397						audioBlock := openai.ChatCompletionContentPartInputAudioParam{
398							InputAudio: openai.ChatCompletionContentPartInputAudioInputAudioParam{
399								Data:   base64Encoded,
400								Format: "mp3",
401							},
402						}
403						content = append(content, openai.ChatCompletionContentPartUnionParam{OfInputAudio: &audioBlock})
404
405					case filePart.MediaType == "application/pdf":
406						// Handle PDF files
407						dataStr := string(filePart.Data)
408
409						// Check if data looks like a file ID (starts with "file-")
410						if strings.HasPrefix(dataStr, "file-") {
411							fileBlock := openai.ChatCompletionContentPartFileParam{
412								File: openai.ChatCompletionContentPartFileFileParam{
413									FileID: param.NewOpt(dataStr),
414								},
415							}
416							content = append(content, openai.ChatCompletionContentPartUnionParam{OfFile: &fileBlock})
417						} else {
418							// Handle as base64 data
419							base64Encoded := base64.StdEncoding.EncodeToString(filePart.Data)
420							data := "data:application/pdf;base64," + base64Encoded
421
422							filename := filePart.Filename
423							if filename == "" {
424								// Generate default filename based on content index
425								filename = fmt.Sprintf("part-%d.pdf", len(content))
426							}
427
428							fileBlock := openai.ChatCompletionContentPartFileParam{
429								File: openai.ChatCompletionContentPartFileFileParam{
430									Filename: param.NewOpt(filename),
431									FileData: param.NewOpt(data),
432								},
433							}
434							content = append(content, openai.ChatCompletionContentPartUnionParam{OfFile: &fileBlock})
435						}
436
437					default:
438						warnings = append(warnings, fantasy.CallWarning{
439							Type:    fantasy.CallWarningTypeOther,
440							Message: fmt.Sprintf("file part media type %s not supported", filePart.MediaType),
441						})
442					}
443				}
444			}
445			messages = append(messages, openai.UserMessage(content))
446		case fantasy.MessageRoleAssistant:
447			// simple assistant message just text content
448			if len(msg.Content) == 1 && msg.Content[0].GetType() == fantasy.ContentTypeText {
449				textPart, ok := fantasy.AsContentType[fantasy.TextPart](msg.Content[0])
450				if !ok {
451					warnings = append(warnings, fantasy.CallWarning{
452						Type:    fantasy.CallWarningTypeOther,
453						Message: "assistant message text part does not have the right type",
454					})
455					continue
456				}
457				messages = append(messages, openai.AssistantMessage(textPart.Text))
458				continue
459			}
460			assistantMsg := openai.ChatCompletionAssistantMessageParam{
461				Role: "assistant",
462			}
463			for _, c := range msg.Content {
464				switch c.GetType() {
465				case fantasy.ContentTypeText:
466					textPart, ok := fantasy.AsContentType[fantasy.TextPart](c)
467					if !ok {
468						warnings = append(warnings, fantasy.CallWarning{
469							Type:    fantasy.CallWarningTypeOther,
470							Message: "assistant message text part does not have the right type",
471						})
472						continue
473					}
474					assistantMsg.Content = openai.ChatCompletionAssistantMessageParamContentUnion{
475						OfString: param.NewOpt(textPart.Text),
476					}
477				case fantasy.ContentTypeToolCall:
478					toolCallPart, ok := fantasy.AsContentType[fantasy.ToolCallPart](c)
479					if !ok {
480						warnings = append(warnings, fantasy.CallWarning{
481							Type:    fantasy.CallWarningTypeOther,
482							Message: "assistant message tool part does not have the right type",
483						})
484						continue
485					}
486					assistantMsg.ToolCalls = append(assistantMsg.ToolCalls,
487						openai.ChatCompletionMessageToolCallUnionParam{
488							OfFunction: &openai.ChatCompletionMessageFunctionToolCallParam{
489								ID:   toolCallPart.ToolCallID,
490								Type: "function",
491								Function: openai.ChatCompletionMessageFunctionToolCallFunctionParam{
492									Name:      toolCallPart.ToolName,
493									Arguments: toolCallPart.Input,
494								},
495							},
496						})
497				}
498			}
499			messages = append(messages, openai.ChatCompletionMessageParamUnion{
500				OfAssistant: &assistantMsg,
501			})
502		case fantasy.MessageRoleTool:
503			for _, c := range msg.Content {
504				if c.GetType() != fantasy.ContentTypeToolResult {
505					warnings = append(warnings, fantasy.CallWarning{
506						Type:    fantasy.CallWarningTypeOther,
507						Message: "tool message can only have tool result content",
508					})
509					continue
510				}
511
512				toolResultPart, ok := fantasy.AsContentType[fantasy.ToolResultPart](c)
513				if !ok {
514					warnings = append(warnings, fantasy.CallWarning{
515						Type:    fantasy.CallWarningTypeOther,
516						Message: "tool message result part does not have the right type",
517					})
518					continue
519				}
520
521				switch toolResultPart.Output.GetType() {
522				case fantasy.ToolResultContentTypeText:
523					output, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentText](toolResultPart.Output)
524					if !ok {
525						warnings = append(warnings, fantasy.CallWarning{
526							Type:    fantasy.CallWarningTypeOther,
527							Message: "tool result output does not have the right type",
528						})
529						continue
530					}
531					messages = append(messages, openai.ToolMessage(output.Text, toolResultPart.ToolCallID))
532				case fantasy.ToolResultContentTypeError:
533					// TODO: check if better handling is needed
534					output, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentError](toolResultPart.Output)
535					if !ok {
536						warnings = append(warnings, fantasy.CallWarning{
537							Type:    fantasy.CallWarningTypeOther,
538							Message: "tool result output does not have the right type",
539						})
540						continue
541					}
542					messages = append(messages, openai.ToolMessage(output.Error.Error(), toolResultPart.ToolCallID))
543				}
544			}
545		}
546	}
547	return messages, warnings
548}