language_model_hooks.go

  1package openai
  2
  3import (
  4	"encoding/base64"
  5	"fmt"
  6	"strings"
  7
  8	"charm.land/fantasy"
  9	"github.com/openai/openai-go/v2"
 10	"github.com/openai/openai-go/v2/packages/param"
 11	"github.com/openai/openai-go/v2/shared"
 12)
 13
 14// LanguageModelPrepareCallFunc is a function that prepares the call for the language model.
 15type LanguageModelPrepareCallFunc = func(model fantasy.LanguageModel, params *openai.ChatCompletionNewParams, call fantasy.Call) ([]fantasy.CallWarning, error)
 16
 17// LanguageModelMapFinishReasonFunc is a function that maps the finish reason for the language model.
 18type LanguageModelMapFinishReasonFunc = func(finishReason string) fantasy.FinishReason
 19
 20// LanguageModelUsageFunc is a function that calculates usage for the language model.
 21type LanguageModelUsageFunc = func(choice openai.ChatCompletion) (fantasy.Usage, fantasy.ProviderOptionsData)
 22
 23// LanguageModelExtraContentFunc is a function that adds extra content for the language model.
 24type LanguageModelExtraContentFunc = func(choice openai.ChatCompletionChoice) []fantasy.Content
 25
 26// LanguageModelStreamExtraFunc is a function that handles stream extra functionality for the language model.
 27type LanguageModelStreamExtraFunc = func(chunk openai.ChatCompletionChunk, yield func(fantasy.StreamPart) bool, ctx map[string]any) (map[string]any, bool)
 28
 29// LanguageModelStreamUsageFunc is a function that calculates stream usage for the language model.
 30type LanguageModelStreamUsageFunc = func(chunk openai.ChatCompletionChunk, ctx map[string]any, metadata fantasy.ProviderMetadata) (fantasy.Usage, fantasy.ProviderMetadata)
 31
 32// LanguageModelStreamProviderMetadataFunc is a function that handles stream provider metadata for the language model.
 33type LanguageModelStreamProviderMetadataFunc = func(choice openai.ChatCompletionChoice, metadata fantasy.ProviderMetadata) fantasy.ProviderMetadata
 34
 35// LanguageModelToPromptFunc is a function that handles converting fantasy prompts to openai sdk messages.
 36type LanguageModelToPromptFunc = func(prompt fantasy.Prompt, provider, model string) ([]openai.ChatCompletionMessageParamUnion, []fantasy.CallWarning)
 37
 38// DefaultPrepareCallFunc is the default implementation for preparing a call to the language model.
 39func DefaultPrepareCallFunc(model fantasy.LanguageModel, params *openai.ChatCompletionNewParams, call fantasy.Call) ([]fantasy.CallWarning, error) {
 40	if call.ProviderOptions == nil {
 41		return nil, nil
 42	}
 43	var warnings []fantasy.CallWarning
 44	providerOptions := &ProviderOptions{}
 45	if v, ok := call.ProviderOptions[Name]; ok {
 46		providerOptions, ok = v.(*ProviderOptions)
 47		if !ok {
 48			return nil, fantasy.NewInvalidArgumentError("providerOptions", "openai provider options should be *openai.ProviderOptions", nil)
 49		}
 50	}
 51
 52	if providerOptions.LogitBias != nil {
 53		params.LogitBias = providerOptions.LogitBias
 54	}
 55	if providerOptions.LogProbs != nil && providerOptions.TopLogProbs != nil {
 56		providerOptions.LogProbs = nil
 57	}
 58	if providerOptions.LogProbs != nil {
 59		params.Logprobs = param.NewOpt(*providerOptions.LogProbs)
 60	}
 61	if providerOptions.TopLogProbs != nil {
 62		params.TopLogprobs = param.NewOpt(*providerOptions.TopLogProbs)
 63	}
 64	if providerOptions.User != nil {
 65		params.User = param.NewOpt(*providerOptions.User)
 66	}
 67	if providerOptions.ParallelToolCalls != nil {
 68		params.ParallelToolCalls = param.NewOpt(*providerOptions.ParallelToolCalls)
 69	}
 70	if providerOptions.MaxCompletionTokens != nil {
 71		params.MaxCompletionTokens = param.NewOpt(*providerOptions.MaxCompletionTokens)
 72	}
 73
 74	if providerOptions.TextVerbosity != nil {
 75		params.Verbosity = openai.ChatCompletionNewParamsVerbosity(*providerOptions.TextVerbosity)
 76	}
 77	if providerOptions.Prediction != nil {
 78		// Convert map[string]any to ChatCompletionPredictionContentParam
 79		if content, ok := providerOptions.Prediction["content"]; ok {
 80			if contentStr, ok := content.(string); ok {
 81				params.Prediction = openai.ChatCompletionPredictionContentParam{
 82					Content: openai.ChatCompletionPredictionContentContentUnionParam{
 83						OfString: param.NewOpt(contentStr),
 84					},
 85				}
 86			}
 87		}
 88	}
 89	if providerOptions.Store != nil {
 90		params.Store = param.NewOpt(*providerOptions.Store)
 91	}
 92	if providerOptions.Metadata != nil {
 93		// Convert map[string]any to map[string]string
 94		metadata := make(map[string]string)
 95		for k, v := range providerOptions.Metadata {
 96			if str, ok := v.(string); ok {
 97				metadata[k] = str
 98			}
 99		}
100		params.Metadata = metadata
101	}
102	if providerOptions.PromptCacheKey != nil {
103		params.PromptCacheKey = param.NewOpt(*providerOptions.PromptCacheKey)
104	}
105	if providerOptions.SafetyIdentifier != nil {
106		params.SafetyIdentifier = param.NewOpt(*providerOptions.SafetyIdentifier)
107	}
108	if providerOptions.ServiceTier != nil {
109		params.ServiceTier = openai.ChatCompletionNewParamsServiceTier(*providerOptions.ServiceTier)
110	}
111
112	if providerOptions.ReasoningEffort != nil {
113		switch *providerOptions.ReasoningEffort {
114		case ReasoningEffortMinimal:
115			params.ReasoningEffort = shared.ReasoningEffortMinimal
116		case ReasoningEffortLow:
117			params.ReasoningEffort = shared.ReasoningEffortLow
118		case ReasoningEffortMedium:
119			params.ReasoningEffort = shared.ReasoningEffortMedium
120		case ReasoningEffortHigh:
121			params.ReasoningEffort = shared.ReasoningEffortHigh
122		default:
123			return nil, fmt.Errorf("reasoning model `%s` not supported", *providerOptions.ReasoningEffort)
124		}
125	}
126
127	if isReasoningModel(model.Model()) {
128		if providerOptions.LogitBias != nil {
129			params.LogitBias = nil
130			warnings = append(warnings, fantasy.CallWarning{
131				Type:    fantasy.CallWarningTypeUnsupportedSetting,
132				Setting: "LogitBias",
133				Message: "LogitBias is not supported for reasoning models",
134			})
135		}
136		if providerOptions.LogProbs != nil {
137			params.Logprobs = param.Opt[bool]{}
138			warnings = append(warnings, fantasy.CallWarning{
139				Type:    fantasy.CallWarningTypeUnsupportedSetting,
140				Setting: "Logprobs",
141				Message: "Logprobs is not supported for reasoning models",
142			})
143		}
144		if providerOptions.TopLogProbs != nil {
145			params.TopLogprobs = param.Opt[int64]{}
146			warnings = append(warnings, fantasy.CallWarning{
147				Type:    fantasy.CallWarningTypeUnsupportedSetting,
148				Setting: "TopLogprobs",
149				Message: "TopLogprobs is not supported for reasoning models",
150			})
151		}
152	}
153
154	// Handle service tier validation
155	if providerOptions.ServiceTier != nil {
156		serviceTier := *providerOptions.ServiceTier
157		if serviceTier == "flex" && !supportsFlexProcessing(model.Model()) {
158			params.ServiceTier = ""
159			warnings = append(warnings, fantasy.CallWarning{
160				Type:    fantasy.CallWarningTypeUnsupportedSetting,
161				Setting: "ServiceTier",
162				Details: "flex processing is only available for o3, o4-mini, and gpt-5 models",
163			})
164		} else if serviceTier == "priority" && !supportsPriorityProcessing(model.Model()) {
165			params.ServiceTier = ""
166			warnings = append(warnings, fantasy.CallWarning{
167				Type:    fantasy.CallWarningTypeUnsupportedSetting,
168				Setting: "ServiceTier",
169				Details: "priority processing is only available for supported models (gpt-4, gpt-5, gpt-5-mini, o3, o4-mini) and requires Enterprise access. gpt-5-nano is not supported",
170			})
171		}
172	}
173	return warnings, nil
174}
175
176// DefaultMapFinishReasonFunc is the default implementation for mapping finish reasons.
177func DefaultMapFinishReasonFunc(finishReason string) fantasy.FinishReason {
178	switch finishReason {
179	case "stop":
180		return fantasy.FinishReasonStop
181	case "length":
182		return fantasy.FinishReasonLength
183	case "content_filter":
184		return fantasy.FinishReasonContentFilter
185	case "function_call", "tool_calls":
186		return fantasy.FinishReasonToolCalls
187	default:
188		return fantasy.FinishReasonUnknown
189	}
190}
191
192// DefaultUsageFunc is the default implementation for calculating usage.
193func DefaultUsageFunc(response openai.ChatCompletion) (fantasy.Usage, fantasy.ProviderOptionsData) {
194	completionTokenDetails := response.Usage.CompletionTokensDetails
195	promptTokenDetails := response.Usage.PromptTokensDetails
196
197	// Build provider metadata
198	providerMetadata := &ProviderMetadata{}
199
200	// Add logprobs if available
201	if len(response.Choices) > 0 && len(response.Choices[0].Logprobs.Content) > 0 {
202		providerMetadata.Logprobs = response.Choices[0].Logprobs.Content
203	}
204
205	// Add prediction tokens if available
206	if completionTokenDetails.AcceptedPredictionTokens > 0 || completionTokenDetails.RejectedPredictionTokens > 0 {
207		if completionTokenDetails.AcceptedPredictionTokens > 0 {
208			providerMetadata.AcceptedPredictionTokens = completionTokenDetails.AcceptedPredictionTokens
209		}
210		if completionTokenDetails.RejectedPredictionTokens > 0 {
211			providerMetadata.RejectedPredictionTokens = completionTokenDetails.RejectedPredictionTokens
212		}
213	}
214	return fantasy.Usage{
215		InputTokens:     response.Usage.PromptTokens,
216		OutputTokens:    response.Usage.CompletionTokens,
217		TotalTokens:     response.Usage.TotalTokens,
218		ReasoningTokens: completionTokenDetails.ReasoningTokens,
219		CacheReadTokens: promptTokenDetails.CachedTokens,
220	}, providerMetadata
221}
222
223// DefaultStreamUsageFunc is the default implementation for calculating stream usage.
224func DefaultStreamUsageFunc(chunk openai.ChatCompletionChunk, _ map[string]any, metadata fantasy.ProviderMetadata) (fantasy.Usage, fantasy.ProviderMetadata) {
225	if chunk.Usage.TotalTokens == 0 {
226		return fantasy.Usage{}, nil
227	}
228	streamProviderMetadata := &ProviderMetadata{}
229	if metadata != nil {
230		if providerMetadata, ok := metadata[Name]; ok {
231			converted, ok := providerMetadata.(*ProviderMetadata)
232			if ok {
233				streamProviderMetadata = converted
234			}
235		}
236	}
237	// we do this here because the acc does not add prompt details
238	completionTokenDetails := chunk.Usage.CompletionTokensDetails
239	promptTokenDetails := chunk.Usage.PromptTokensDetails
240	usage := fantasy.Usage{
241		InputTokens:     chunk.Usage.PromptTokens,
242		OutputTokens:    chunk.Usage.CompletionTokens,
243		TotalTokens:     chunk.Usage.TotalTokens,
244		ReasoningTokens: completionTokenDetails.ReasoningTokens,
245		CacheReadTokens: promptTokenDetails.CachedTokens,
246	}
247
248	// Add prediction tokens if available
249	if completionTokenDetails.AcceptedPredictionTokens > 0 || completionTokenDetails.RejectedPredictionTokens > 0 {
250		if completionTokenDetails.AcceptedPredictionTokens > 0 {
251			streamProviderMetadata.AcceptedPredictionTokens = completionTokenDetails.AcceptedPredictionTokens
252		}
253		if completionTokenDetails.RejectedPredictionTokens > 0 {
254			streamProviderMetadata.RejectedPredictionTokens = completionTokenDetails.RejectedPredictionTokens
255		}
256	}
257
258	return usage, fantasy.ProviderMetadata{
259		Name: streamProviderMetadata,
260	}
261}
262
263// DefaultStreamProviderMetadataFunc is the default implementation for handling stream provider metadata.
264func DefaultStreamProviderMetadataFunc(choice openai.ChatCompletionChoice, metadata fantasy.ProviderMetadata) fantasy.ProviderMetadata {
265	if metadata == nil {
266		metadata = fantasy.ProviderMetadata{}
267	}
268	streamProviderMetadata, ok := metadata[Name]
269	if !ok {
270		streamProviderMetadata = &ProviderMetadata{}
271	}
272	if converted, ok := streamProviderMetadata.(*ProviderMetadata); ok {
273		converted.Logprobs = choice.Logprobs.Content
274		metadata[Name] = converted
275	}
276	return metadata
277}
278
279// DefaultToPrompt converts a fantasy prompt to OpenAI format with default handling.
280func DefaultToPrompt(prompt fantasy.Prompt, _, _ string) ([]openai.ChatCompletionMessageParamUnion, []fantasy.CallWarning) {
281	var messages []openai.ChatCompletionMessageParamUnion
282	var warnings []fantasy.CallWarning
283	for _, msg := range prompt {
284		switch msg.Role {
285		case fantasy.MessageRoleSystem:
286			var systemPromptParts []string
287			for _, c := range msg.Content {
288				if c.GetType() != fantasy.ContentTypeText {
289					warnings = append(warnings, fantasy.CallWarning{
290						Type:    fantasy.CallWarningTypeOther,
291						Message: "system prompt can only have text content",
292					})
293					continue
294				}
295				textPart, ok := fantasy.AsContentType[fantasy.TextPart](c)
296				if !ok {
297					warnings = append(warnings, fantasy.CallWarning{
298						Type:    fantasy.CallWarningTypeOther,
299						Message: "system prompt text part does not have the right type",
300					})
301					continue
302				}
303				text := textPart.Text
304				if strings.TrimSpace(text) != "" {
305					systemPromptParts = append(systemPromptParts, textPart.Text)
306				}
307			}
308			if len(systemPromptParts) == 0 {
309				warnings = append(warnings, fantasy.CallWarning{
310					Type:    fantasy.CallWarningTypeOther,
311					Message: "system prompt has no text parts",
312				})
313				continue
314			}
315			messages = append(messages, openai.SystemMessage(strings.Join(systemPromptParts, "\n")))
316		case fantasy.MessageRoleUser:
317			// simple user message just text content
318			if len(msg.Content) == 1 && msg.Content[0].GetType() == fantasy.ContentTypeText {
319				textPart, ok := fantasy.AsContentType[fantasy.TextPart](msg.Content[0])
320				if !ok {
321					warnings = append(warnings, fantasy.CallWarning{
322						Type:    fantasy.CallWarningTypeOther,
323						Message: "user message text part does not have the right type",
324					})
325					continue
326				}
327				messages = append(messages, openai.UserMessage(textPart.Text))
328				continue
329			}
330			// text content and attachments
331			// for now we only support image content later we need to check
332			// TODO: add the supported media types to the language model so we
333			//  can use that to validate the data here.
334			var content []openai.ChatCompletionContentPartUnionParam
335			for _, c := range msg.Content {
336				switch c.GetType() {
337				case fantasy.ContentTypeText:
338					textPart, ok := fantasy.AsContentType[fantasy.TextPart](c)
339					if !ok {
340						warnings = append(warnings, fantasy.CallWarning{
341							Type:    fantasy.CallWarningTypeOther,
342							Message: "user message text part does not have the right type",
343						})
344						continue
345					}
346					content = append(content, openai.ChatCompletionContentPartUnionParam{
347						OfText: &openai.ChatCompletionContentPartTextParam{
348							Text: textPart.Text,
349						},
350					})
351				case fantasy.ContentTypeFile:
352					filePart, ok := fantasy.AsContentType[fantasy.FilePart](c)
353					if !ok {
354						warnings = append(warnings, fantasy.CallWarning{
355							Type:    fantasy.CallWarningTypeOther,
356							Message: "user message file part does not have the right type",
357						})
358						continue
359					}
360
361					switch {
362					case strings.HasPrefix(filePart.MediaType, "image/"):
363						// Handle image files
364						base64Encoded := base64.StdEncoding.EncodeToString(filePart.Data)
365						data := "data:" + filePart.MediaType + ";base64," + base64Encoded
366						imageURL := openai.ChatCompletionContentPartImageImageURLParam{URL: data}
367
368						// Check for provider-specific options like image detail
369						if providerOptions, ok := filePart.ProviderOptions[Name]; ok {
370							if detail, ok := providerOptions.(*ProviderFileOptions); ok {
371								imageURL.Detail = detail.ImageDetail
372							}
373						}
374
375						imageBlock := openai.ChatCompletionContentPartImageParam{ImageURL: imageURL}
376						content = append(content, openai.ChatCompletionContentPartUnionParam{OfImageURL: &imageBlock})
377
378					case filePart.MediaType == "audio/wav":
379						// Handle WAV audio files
380						base64Encoded := base64.StdEncoding.EncodeToString(filePart.Data)
381						audioBlock := openai.ChatCompletionContentPartInputAudioParam{
382							InputAudio: openai.ChatCompletionContentPartInputAudioInputAudioParam{
383								Data:   base64Encoded,
384								Format: "wav",
385							},
386						}
387						content = append(content, openai.ChatCompletionContentPartUnionParam{OfInputAudio: &audioBlock})
388
389					case filePart.MediaType == "audio/mpeg" || filePart.MediaType == "audio/mp3":
390						// Handle MP3 audio files
391						base64Encoded := base64.StdEncoding.EncodeToString(filePart.Data)
392						audioBlock := openai.ChatCompletionContentPartInputAudioParam{
393							InputAudio: openai.ChatCompletionContentPartInputAudioInputAudioParam{
394								Data:   base64Encoded,
395								Format: "mp3",
396							},
397						}
398						content = append(content, openai.ChatCompletionContentPartUnionParam{OfInputAudio: &audioBlock})
399
400					case filePart.MediaType == "application/pdf":
401						// Handle PDF files
402						dataStr := string(filePart.Data)
403
404						// Check if data looks like a file ID (starts with "file-")
405						if strings.HasPrefix(dataStr, "file-") {
406							fileBlock := openai.ChatCompletionContentPartFileParam{
407								File: openai.ChatCompletionContentPartFileFileParam{
408									FileID: param.NewOpt(dataStr),
409								},
410							}
411							content = append(content, openai.ChatCompletionContentPartUnionParam{OfFile: &fileBlock})
412						} else {
413							// Handle as base64 data
414							base64Encoded := base64.StdEncoding.EncodeToString(filePart.Data)
415							data := "data:application/pdf;base64," + base64Encoded
416
417							filename := filePart.Filename
418							if filename == "" {
419								// Generate default filename based on content index
420								filename = fmt.Sprintf("part-%d.pdf", len(content))
421							}
422
423							fileBlock := openai.ChatCompletionContentPartFileParam{
424								File: openai.ChatCompletionContentPartFileFileParam{
425									Filename: param.NewOpt(filename),
426									FileData: param.NewOpt(data),
427								},
428							}
429							content = append(content, openai.ChatCompletionContentPartUnionParam{OfFile: &fileBlock})
430						}
431
432					default:
433						warnings = append(warnings, fantasy.CallWarning{
434							Type:    fantasy.CallWarningTypeOther,
435							Message: fmt.Sprintf("file part media type %s not supported", filePart.MediaType),
436						})
437					}
438				}
439			}
440			messages = append(messages, openai.UserMessage(content))
441		case fantasy.MessageRoleAssistant:
442			// simple assistant message just text content
443			if len(msg.Content) == 1 && msg.Content[0].GetType() == fantasy.ContentTypeText {
444				textPart, ok := fantasy.AsContentType[fantasy.TextPart](msg.Content[0])
445				if !ok {
446					warnings = append(warnings, fantasy.CallWarning{
447						Type:    fantasy.CallWarningTypeOther,
448						Message: "assistant message text part does not have the right type",
449					})
450					continue
451				}
452				messages = append(messages, openai.AssistantMessage(textPart.Text))
453				continue
454			}
455			assistantMsg := openai.ChatCompletionAssistantMessageParam{
456				Role: "assistant",
457			}
458			for _, c := range msg.Content {
459				switch c.GetType() {
460				case fantasy.ContentTypeText:
461					textPart, ok := fantasy.AsContentType[fantasy.TextPart](c)
462					if !ok {
463						warnings = append(warnings, fantasy.CallWarning{
464							Type:    fantasy.CallWarningTypeOther,
465							Message: "assistant message text part does not have the right type",
466						})
467						continue
468					}
469					assistantMsg.Content = openai.ChatCompletionAssistantMessageParamContentUnion{
470						OfString: param.NewOpt(textPart.Text),
471					}
472				case fantasy.ContentTypeToolCall:
473					toolCallPart, ok := fantasy.AsContentType[fantasy.ToolCallPart](c)
474					if !ok {
475						warnings = append(warnings, fantasy.CallWarning{
476							Type:    fantasy.CallWarningTypeOther,
477							Message: "assistant message tool part does not have the right type",
478						})
479						continue
480					}
481					assistantMsg.ToolCalls = append(assistantMsg.ToolCalls,
482						openai.ChatCompletionMessageToolCallUnionParam{
483							OfFunction: &openai.ChatCompletionMessageFunctionToolCallParam{
484								ID:   toolCallPart.ToolCallID,
485								Type: "function",
486								Function: openai.ChatCompletionMessageFunctionToolCallFunctionParam{
487									Name:      toolCallPart.ToolName,
488									Arguments: toolCallPart.Input,
489								},
490							},
491						})
492				}
493			}
494			messages = append(messages, openai.ChatCompletionMessageParamUnion{
495				OfAssistant: &assistantMsg,
496			})
497		case fantasy.MessageRoleTool:
498			for _, c := range msg.Content {
499				if c.GetType() != fantasy.ContentTypeToolResult {
500					warnings = append(warnings, fantasy.CallWarning{
501						Type:    fantasy.CallWarningTypeOther,
502						Message: "tool message can only have tool result content",
503					})
504					continue
505				}
506
507				toolResultPart, ok := fantasy.AsContentType[fantasy.ToolResultPart](c)
508				if !ok {
509					warnings = append(warnings, fantasy.CallWarning{
510						Type:    fantasy.CallWarningTypeOther,
511						Message: "tool message result part does not have the right type",
512					})
513					continue
514				}
515
516				switch toolResultPart.Output.GetType() {
517				case fantasy.ToolResultContentTypeText:
518					output, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentText](toolResultPart.Output)
519					if !ok {
520						warnings = append(warnings, fantasy.CallWarning{
521							Type:    fantasy.CallWarningTypeOther,
522							Message: "tool result output does not have the right type",
523						})
524						continue
525					}
526					messages = append(messages, openai.ToolMessage(output.Text, toolResultPart.ToolCallID))
527				case fantasy.ToolResultContentTypeError:
528					// TODO: check if better handling is needed
529					output, ok := fantasy.AsToolResultOutputType[fantasy.ToolResultOutputContentError](toolResultPart.Output)
530					if !ok {
531						warnings = append(warnings, fantasy.CallWarning{
532							Type:    fantasy.CallWarningTypeOther,
533							Message: "tool result output does not have the right type",
534						})
535						continue
536					}
537					messages = append(messages, openai.ToolMessage(output.Error.Error(), toolResultPart.ToolCallID))
538				}
539			}
540		}
541	}
542	return messages, warnings
543}