language_model.go

  1package openai
  2
  3import (
  4	"context"
  5	"encoding/base64"
  6	"encoding/json"
  7	"errors"
  8	"fmt"
  9	"io"
 10	"strings"
 11
 12	"github.com/charmbracelet/fantasy/ai"
 13	xjson "github.com/charmbracelet/x/json"
 14	"github.com/google/uuid"
 15	"github.com/openai/openai-go/v2"
 16	"github.com/openai/openai-go/v2/packages/param"
 17	"github.com/openai/openai-go/v2/shared"
 18)
 19
 20type languageModel struct {
 21	provider        string
 22	modelID         string
 23	client          openai.Client
 24	prepareCallFunc PrepareLanguageModelCallFunc
 25}
 26
 27type LanguageModelOption = func(*languageModel)
 28
 29func WithPrepareLanguageModelCallFunc(fn PrepareLanguageModelCallFunc) LanguageModelOption {
 30	return func(l *languageModel) {
 31		l.prepareCallFunc = fn
 32	}
 33}
 34
 35func newLanguageModel(modelID string, provider string, client openai.Client, opts ...LanguageModelOption) languageModel {
 36	model := languageModel{
 37		modelID:         modelID,
 38		provider:        provider,
 39		client:          client,
 40		prepareCallFunc: defaultPrepareLanguageModelCall,
 41	}
 42
 43	for _, o := range opts {
 44		o(&model)
 45	}
 46	return model
 47}
 48
 49type streamToolCall struct {
 50	id          string
 51	name        string
 52	arguments   string
 53	hasFinished bool
 54}
 55
 56// Model implements ai.LanguageModel.
 57func (o languageModel) Model() string {
 58	return o.modelID
 59}
 60
 61// Provider implements ai.LanguageModel.
 62func (o languageModel) Provider() string {
 63	return o.provider
 64}
 65
 66func (o languageModel) prepareParams(call ai.Call) (*openai.ChatCompletionNewParams, []ai.CallWarning, error) {
 67	params := &openai.ChatCompletionNewParams{}
 68	messages, warnings := toPrompt(call.Prompt)
 69	if call.TopK != nil {
 70		warnings = append(warnings, ai.CallWarning{
 71			Type:    ai.CallWarningTypeUnsupportedSetting,
 72			Setting: "top_k",
 73		})
 74	}
 75	params.Messages = messages
 76	params.Model = o.modelID
 77
 78	if call.MaxOutputTokens != nil {
 79		params.MaxTokens = param.NewOpt(*call.MaxOutputTokens)
 80	}
 81	if call.Temperature != nil {
 82		params.Temperature = param.NewOpt(*call.Temperature)
 83	}
 84	if call.TopP != nil {
 85		params.TopP = param.NewOpt(*call.TopP)
 86	}
 87	if call.FrequencyPenalty != nil {
 88		params.FrequencyPenalty = param.NewOpt(*call.FrequencyPenalty)
 89	}
 90	if call.PresencePenalty != nil {
 91		params.PresencePenalty = param.NewOpt(*call.PresencePenalty)
 92	}
 93
 94	if isReasoningModel(o.modelID) {
 95		// remove unsupported settings for reasoning models
 96		// see https://platform.openai.com/docs/guides/reasoning#limitations
 97		if call.Temperature != nil {
 98			params.Temperature = param.Opt[float64]{}
 99			warnings = append(warnings, ai.CallWarning{
100				Type:    ai.CallWarningTypeUnsupportedSetting,
101				Setting: "temperature",
102				Details: "temperature is not supported for reasoning models",
103			})
104		}
105		if call.TopP != nil {
106			params.TopP = param.Opt[float64]{}
107			warnings = append(warnings, ai.CallWarning{
108				Type:    ai.CallWarningTypeUnsupportedSetting,
109				Setting: "TopP",
110				Details: "TopP is not supported for reasoning models",
111			})
112		}
113		if call.FrequencyPenalty != nil {
114			params.FrequencyPenalty = param.Opt[float64]{}
115			warnings = append(warnings, ai.CallWarning{
116				Type:    ai.CallWarningTypeUnsupportedSetting,
117				Setting: "FrequencyPenalty",
118				Details: "FrequencyPenalty is not supported for reasoning models",
119			})
120		}
121		if call.PresencePenalty != nil {
122			params.PresencePenalty = param.Opt[float64]{}
123			warnings = append(warnings, ai.CallWarning{
124				Type:    ai.CallWarningTypeUnsupportedSetting,
125				Setting: "PresencePenalty",
126				Details: "PresencePenalty is not supported for reasoning models",
127			})
128		}
129
130		// reasoning models use max_completion_tokens instead of max_tokens
131		if call.MaxOutputTokens != nil {
132			if !params.MaxCompletionTokens.Valid() {
133				params.MaxCompletionTokens = param.NewOpt(*call.MaxOutputTokens)
134			}
135			params.MaxTokens = param.Opt[int64]{}
136		}
137	}
138
139	// Handle search preview models
140	if isSearchPreviewModel(o.modelID) {
141		if call.Temperature != nil {
142			params.Temperature = param.Opt[float64]{}
143			warnings = append(warnings, ai.CallWarning{
144				Type:    ai.CallWarningTypeUnsupportedSetting,
145				Setting: "temperature",
146				Details: "temperature is not supported for the search preview models and has been removed.",
147			})
148		}
149	}
150
151	optionsWarnings, err := o.prepareCallFunc(o, params, call)
152	if err != nil {
153		return nil, nil, err
154	}
155
156	if len(optionsWarnings) > 0 {
157		warnings = append(warnings, optionsWarnings...)
158	}
159
160	if len(call.Tools) > 0 {
161		tools, toolChoice, toolWarnings := toOpenAiTools(call.Tools, call.ToolChoice)
162		params.Tools = tools
163		if toolChoice != nil {
164			params.ToolChoice = *toolChoice
165		}
166		warnings = append(warnings, toolWarnings...)
167	}
168	return params, warnings, nil
169}
170
171func (o languageModel) handleError(err error) error {
172	var apiErr *openai.Error
173	if errors.As(err, &apiErr) {
174		requestDump := apiErr.DumpRequest(true)
175		responseDump := apiErr.DumpResponse(true)
176		headers := map[string]string{}
177		for k, h := range apiErr.Response.Header {
178			v := h[len(h)-1]
179			headers[strings.ToLower(k)] = v
180		}
181		return ai.NewAPICallError(
182			apiErr.Message,
183			apiErr.Request.URL.String(),
184			string(requestDump),
185			apiErr.StatusCode,
186			headers,
187			string(responseDump),
188			apiErr,
189			false,
190		)
191	}
192	return err
193}
194
195// Generate implements ai.LanguageModel.
196func (o languageModel) Generate(ctx context.Context, call ai.Call) (*ai.Response, error) {
197	params, warnings, err := o.prepareParams(call)
198	if err != nil {
199		return nil, err
200	}
201	response, err := o.client.Chat.Completions.New(ctx, *params)
202	if err != nil {
203		return nil, o.handleError(err)
204	}
205
206	if len(response.Choices) == 0 {
207		return nil, errors.New("no response generated")
208	}
209	choice := response.Choices[0]
210	content := make([]ai.Content, 0, 1+len(choice.Message.ToolCalls)+len(choice.Message.Annotations))
211	text := choice.Message.Content
212	if text != "" {
213		content = append(content, ai.TextContent{
214			Text: text,
215		})
216	}
217
218	for _, tc := range choice.Message.ToolCalls {
219		toolCallID := tc.ID
220		if toolCallID == "" {
221			toolCallID = uuid.NewString()
222		}
223		content = append(content, ai.ToolCallContent{
224			ProviderExecuted: false, // TODO: update when handling other tools
225			ToolCallID:       toolCallID,
226			ToolName:         tc.Function.Name,
227			Input:            tc.Function.Arguments,
228		})
229	}
230	// Handle annotations/citations
231	for _, annotation := range choice.Message.Annotations {
232		if annotation.Type == "url_citation" {
233			content = append(content, ai.SourceContent{
234				SourceType: ai.SourceTypeURL,
235				ID:         uuid.NewString(),
236				URL:        annotation.URLCitation.URL,
237				Title:      annotation.URLCitation.Title,
238			})
239		}
240	}
241
242	completionTokenDetails := response.Usage.CompletionTokensDetails
243	promptTokenDetails := response.Usage.PromptTokensDetails
244
245	// Build provider metadata
246	providerMetadata := &ProviderMetadata{}
247	// Add logprobs if available
248	if len(choice.Logprobs.Content) > 0 {
249		providerMetadata.Logprobs = choice.Logprobs.Content
250	}
251
252	// Add prediction tokens if available
253	if completionTokenDetails.AcceptedPredictionTokens > 0 || completionTokenDetails.RejectedPredictionTokens > 0 {
254		if completionTokenDetails.AcceptedPredictionTokens > 0 {
255			providerMetadata.AcceptedPredictionTokens = completionTokenDetails.AcceptedPredictionTokens
256		}
257		if completionTokenDetails.RejectedPredictionTokens > 0 {
258			providerMetadata.RejectedPredictionTokens = completionTokenDetails.RejectedPredictionTokens
259		}
260	}
261
262	return &ai.Response{
263		Content: content,
264		Usage: ai.Usage{
265			InputTokens:     response.Usage.PromptTokens,
266			OutputTokens:    response.Usage.CompletionTokens,
267			TotalTokens:     response.Usage.TotalTokens,
268			ReasoningTokens: completionTokenDetails.ReasoningTokens,
269			CacheReadTokens: promptTokenDetails.CachedTokens,
270		},
271		FinishReason: mapOpenAiFinishReason(choice.FinishReason),
272		ProviderMetadata: ai.ProviderMetadata{
273			Name: providerMetadata,
274		},
275		Warnings: warnings,
276	}, nil
277}
278
279// Stream implements ai.LanguageModel.
280func (o languageModel) Stream(ctx context.Context, call ai.Call) (ai.StreamResponse, error) {
281	params, warnings, err := o.prepareParams(call)
282	if err != nil {
283		return nil, err
284	}
285
286	params.StreamOptions = openai.ChatCompletionStreamOptionsParam{
287		IncludeUsage: openai.Bool(true),
288	}
289
290	stream := o.client.Chat.Completions.NewStreaming(ctx, *params)
291	isActiveText := false
292	toolCalls := make(map[int64]streamToolCall)
293
294	// Build provider metadata for streaming
295	streamProviderMetadata := &ProviderMetadata{}
296	acc := openai.ChatCompletionAccumulator{}
297	var usage ai.Usage
298	return func(yield func(ai.StreamPart) bool) {
299		if len(warnings) > 0 {
300			if !yield(ai.StreamPart{
301				Type:     ai.StreamPartTypeWarnings,
302				Warnings: warnings,
303			}) {
304				return
305			}
306		}
307		for stream.Next() {
308			chunk := stream.Current()
309			acc.AddChunk(chunk)
310			if chunk.Usage.TotalTokens > 0 {
311				// we do this here because the acc does not add prompt details
312				completionTokenDetails := chunk.Usage.CompletionTokensDetails
313				promptTokenDetails := chunk.Usage.PromptTokensDetails
314				usage = ai.Usage{
315					InputTokens:     chunk.Usage.PromptTokens,
316					OutputTokens:    chunk.Usage.CompletionTokens,
317					TotalTokens:     chunk.Usage.TotalTokens,
318					ReasoningTokens: completionTokenDetails.ReasoningTokens,
319					CacheReadTokens: promptTokenDetails.CachedTokens,
320				}
321
322				// Add prediction tokens if available
323				if completionTokenDetails.AcceptedPredictionTokens > 0 || completionTokenDetails.RejectedPredictionTokens > 0 {
324					if completionTokenDetails.AcceptedPredictionTokens > 0 {
325						streamProviderMetadata.AcceptedPredictionTokens = completionTokenDetails.AcceptedPredictionTokens
326					}
327					if completionTokenDetails.RejectedPredictionTokens > 0 {
328						streamProviderMetadata.RejectedPredictionTokens = completionTokenDetails.RejectedPredictionTokens
329					}
330				}
331			}
332			if len(chunk.Choices) == 0 {
333				continue
334			}
335			for _, choice := range chunk.Choices {
336				switch {
337				case choice.Delta.Content != "":
338					if !isActiveText {
339						isActiveText = true
340						if !yield(ai.StreamPart{
341							Type: ai.StreamPartTypeTextStart,
342							ID:   "0",
343						}) {
344							return
345						}
346					}
347					if !yield(ai.StreamPart{
348						Type:  ai.StreamPartTypeTextDelta,
349						ID:    "0",
350						Delta: choice.Delta.Content,
351					}) {
352						return
353					}
354				case len(choice.Delta.ToolCalls) > 0:
355					if isActiveText {
356						isActiveText = false
357						if !yield(ai.StreamPart{
358							Type: ai.StreamPartTypeTextEnd,
359							ID:   "0",
360						}) {
361							return
362						}
363					}
364
365					for _, toolCallDelta := range choice.Delta.ToolCalls {
366						if existingToolCall, ok := toolCalls[toolCallDelta.Index]; ok {
367							if existingToolCall.hasFinished {
368								continue
369							}
370							if toolCallDelta.Function.Arguments != "" {
371								existingToolCall.arguments += toolCallDelta.Function.Arguments
372							}
373							if !yield(ai.StreamPart{
374								Type:  ai.StreamPartTypeToolInputDelta,
375								ID:    existingToolCall.id,
376								Delta: toolCallDelta.Function.Arguments,
377							}) {
378								return
379							}
380							toolCalls[toolCallDelta.Index] = existingToolCall
381							if xjson.IsValid(existingToolCall.arguments) {
382								if !yield(ai.StreamPart{
383									Type: ai.StreamPartTypeToolInputEnd,
384									ID:   existingToolCall.id,
385								}) {
386									return
387								}
388
389								if !yield(ai.StreamPart{
390									Type:          ai.StreamPartTypeToolCall,
391									ID:            existingToolCall.id,
392									ToolCallName:  existingToolCall.name,
393									ToolCallInput: existingToolCall.arguments,
394								}) {
395									return
396								}
397								existingToolCall.hasFinished = true
398								toolCalls[toolCallDelta.Index] = existingToolCall
399							}
400						} else {
401							// Does not exist
402							var err error
403							if toolCallDelta.Type != "function" {
404								err = ai.NewInvalidResponseDataError(toolCallDelta, "Expected 'function' type.")
405							}
406							if toolCallDelta.ID == "" {
407								err = ai.NewInvalidResponseDataError(toolCallDelta, "Expected 'id' to be a string.")
408							}
409							if toolCallDelta.Function.Name == "" {
410								err = ai.NewInvalidResponseDataError(toolCallDelta, "Expected 'function.name' to be a string.")
411							}
412							if err != nil {
413								yield(ai.StreamPart{
414									Type:  ai.StreamPartTypeError,
415									Error: o.handleError(stream.Err()),
416								})
417								return
418							}
419
420							if !yield(ai.StreamPart{
421								Type:         ai.StreamPartTypeToolInputStart,
422								ID:           toolCallDelta.ID,
423								ToolCallName: toolCallDelta.Function.Name,
424							}) {
425								return
426							}
427							toolCalls[toolCallDelta.Index] = streamToolCall{
428								id:        toolCallDelta.ID,
429								name:      toolCallDelta.Function.Name,
430								arguments: toolCallDelta.Function.Arguments,
431							}
432
433							exTc := toolCalls[toolCallDelta.Index]
434							if exTc.arguments != "" {
435								if !yield(ai.StreamPart{
436									Type:  ai.StreamPartTypeToolInputDelta,
437									ID:    exTc.id,
438									Delta: exTc.arguments,
439								}) {
440									return
441								}
442								if xjson.IsValid(toolCalls[toolCallDelta.Index].arguments) {
443									if !yield(ai.StreamPart{
444										Type: ai.StreamPartTypeToolInputEnd,
445										ID:   toolCallDelta.ID,
446									}) {
447										return
448									}
449
450									if !yield(ai.StreamPart{
451										Type:          ai.StreamPartTypeToolCall,
452										ID:            exTc.id,
453										ToolCallName:  exTc.name,
454										ToolCallInput: exTc.arguments,
455									}) {
456										return
457									}
458									exTc.hasFinished = true
459									toolCalls[toolCallDelta.Index] = exTc
460								}
461							}
462							continue
463						}
464					}
465				}
466			}
467
468			// Check for annotations in the delta's raw JSON
469			for _, choice := range chunk.Choices {
470				if annotations := parseAnnotationsFromDelta(choice.Delta); len(annotations) > 0 {
471					for _, annotation := range annotations {
472						if annotation.Type == "url_citation" {
473							if !yield(ai.StreamPart{
474								Type:       ai.StreamPartTypeSource,
475								ID:         uuid.NewString(),
476								SourceType: ai.SourceTypeURL,
477								URL:        annotation.URLCitation.URL,
478								Title:      annotation.URLCitation.Title,
479							}) {
480								return
481							}
482						}
483					}
484				}
485			}
486		}
487		err := stream.Err()
488		if err == nil || errors.Is(err, io.EOF) {
489			// finished
490			if isActiveText {
491				isActiveText = false
492				if !yield(ai.StreamPart{
493					Type: ai.StreamPartTypeTextEnd,
494					ID:   "0",
495				}) {
496					return
497				}
498			}
499
500			// Add logprobs if available
501			if len(acc.Choices) > 0 && len(acc.Choices[0].Logprobs.Content) > 0 {
502				streamProviderMetadata.Logprobs = acc.Choices[0].Logprobs.Content
503			}
504
505			// Handle annotations/citations from accumulated response
506			if len(acc.Choices) > 0 {
507				for _, annotation := range acc.Choices[0].Message.Annotations {
508					if annotation.Type == "url_citation" {
509						if !yield(ai.StreamPart{
510							Type:       ai.StreamPartTypeSource,
511							ID:         acc.ID,
512							SourceType: ai.SourceTypeURL,
513							URL:        annotation.URLCitation.URL,
514							Title:      annotation.URLCitation.Title,
515						}) {
516							return
517						}
518					}
519				}
520			}
521
522			finishReason := mapOpenAiFinishReason(acc.Choices[0].FinishReason)
523			yield(ai.StreamPart{
524				Type:         ai.StreamPartTypeFinish,
525				Usage:        usage,
526				FinishReason: finishReason,
527				ProviderMetadata: ai.ProviderMetadata{
528					Name: streamProviderMetadata,
529				},
530			})
531			return
532		} else {
533			yield(ai.StreamPart{
534				Type:  ai.StreamPartTypeError,
535				Error: o.handleError(err),
536			})
537			return
538		}
539	}, nil
540}
541
542func mapOpenAiFinishReason(finishReason string) ai.FinishReason {
543	switch finishReason {
544	case "stop":
545		return ai.FinishReasonStop
546	case "length":
547		return ai.FinishReasonLength
548	case "content_filter":
549		return ai.FinishReasonContentFilter
550	case "function_call", "tool_calls":
551		return ai.FinishReasonToolCalls
552	default:
553		return ai.FinishReasonUnknown
554	}
555}
556
557func isReasoningModel(modelID string) bool {
558	return strings.HasPrefix(modelID, "o") || strings.HasPrefix(modelID, "gpt-5") || strings.HasPrefix(modelID, "gpt-5-chat")
559}
560
561func isSearchPreviewModel(modelID string) bool {
562	return strings.Contains(modelID, "search-preview")
563}
564
565func supportsFlexProcessing(modelID string) bool {
566	return strings.HasPrefix(modelID, "o3") || strings.HasPrefix(modelID, "o4-mini") || strings.HasPrefix(modelID, "gpt-5")
567}
568
569func supportsPriorityProcessing(modelID string) bool {
570	return strings.HasPrefix(modelID, "gpt-4") || strings.HasPrefix(modelID, "gpt-5") ||
571		strings.HasPrefix(modelID, "gpt-5-mini") || strings.HasPrefix(modelID, "o3") ||
572		strings.HasPrefix(modelID, "o4-mini")
573}
574
575func toOpenAiTools(tools []ai.Tool, toolChoice *ai.ToolChoice) (openAiTools []openai.ChatCompletionToolUnionParam, openAiToolChoice *openai.ChatCompletionToolChoiceOptionUnionParam, warnings []ai.CallWarning) {
576	for _, tool := range tools {
577		if tool.GetType() == ai.ToolTypeFunction {
578			ft, ok := tool.(ai.FunctionTool)
579			if !ok {
580				continue
581			}
582			openAiTools = append(openAiTools, openai.ChatCompletionToolUnionParam{
583				OfFunction: &openai.ChatCompletionFunctionToolParam{
584					Function: shared.FunctionDefinitionParam{
585						Name:        ft.Name,
586						Description: param.NewOpt(ft.Description),
587						Parameters:  openai.FunctionParameters(ft.InputSchema),
588						Strict:      param.NewOpt(false),
589					},
590					Type: "function",
591				},
592			})
593			continue
594		}
595
596		// TODO: handle provider tool calls
597		warnings = append(warnings, ai.CallWarning{
598			Type:    ai.CallWarningTypeUnsupportedTool,
599			Tool:    tool,
600			Message: "tool is not supported",
601		})
602	}
603	if toolChoice == nil {
604		return openAiTools, openAiToolChoice, warnings
605	}
606
607	switch *toolChoice {
608	case ai.ToolChoiceAuto:
609		openAiToolChoice = &openai.ChatCompletionToolChoiceOptionUnionParam{
610			OfAuto: param.NewOpt("auto"),
611		}
612	case ai.ToolChoiceNone:
613		openAiToolChoice = &openai.ChatCompletionToolChoiceOptionUnionParam{
614			OfAuto: param.NewOpt("none"),
615		}
616	default:
617		openAiToolChoice = &openai.ChatCompletionToolChoiceOptionUnionParam{
618			OfFunctionToolChoice: &openai.ChatCompletionNamedToolChoiceParam{
619				Type: "function",
620				Function: openai.ChatCompletionNamedToolChoiceFunctionParam{
621					Name: string(*toolChoice),
622				},
623			},
624		}
625	}
626	return openAiTools, openAiToolChoice, warnings
627}
628
629func toPrompt(prompt ai.Prompt) ([]openai.ChatCompletionMessageParamUnion, []ai.CallWarning) {
630	var messages []openai.ChatCompletionMessageParamUnion
631	var warnings []ai.CallWarning
632	for _, msg := range prompt {
633		switch msg.Role {
634		case ai.MessageRoleSystem:
635			var systemPromptParts []string
636			for _, c := range msg.Content {
637				if c.GetType() != ai.ContentTypeText {
638					warnings = append(warnings, ai.CallWarning{
639						Type:    ai.CallWarningTypeOther,
640						Message: "system prompt can only have text content",
641					})
642					continue
643				}
644				textPart, ok := ai.AsContentType[ai.TextPart](c)
645				if !ok {
646					warnings = append(warnings, ai.CallWarning{
647						Type:    ai.CallWarningTypeOther,
648						Message: "system prompt text part does not have the right type",
649					})
650					continue
651				}
652				text := textPart.Text
653				if strings.TrimSpace(text) != "" {
654					systemPromptParts = append(systemPromptParts, textPart.Text)
655				}
656			}
657			if len(systemPromptParts) == 0 {
658				warnings = append(warnings, ai.CallWarning{
659					Type:    ai.CallWarningTypeOther,
660					Message: "system prompt has no text parts",
661				})
662				continue
663			}
664			messages = append(messages, openai.SystemMessage(strings.Join(systemPromptParts, "\n")))
665		case ai.MessageRoleUser:
666			// simple user message just text content
667			if len(msg.Content) == 1 && msg.Content[0].GetType() == ai.ContentTypeText {
668				textPart, ok := ai.AsContentType[ai.TextPart](msg.Content[0])
669				if !ok {
670					warnings = append(warnings, ai.CallWarning{
671						Type:    ai.CallWarningTypeOther,
672						Message: "user message text part does not have the right type",
673					})
674					continue
675				}
676				messages = append(messages, openai.UserMessage(textPart.Text))
677				continue
678			}
679			// text content and attachments
680			// for now we only support image content later we need to check
681			// TODO: add the supported media types to the language model so we
682			//  can use that to validate the data here.
683			var content []openai.ChatCompletionContentPartUnionParam
684			for _, c := range msg.Content {
685				switch c.GetType() {
686				case ai.ContentTypeText:
687					textPart, ok := ai.AsContentType[ai.TextPart](c)
688					if !ok {
689						warnings = append(warnings, ai.CallWarning{
690							Type:    ai.CallWarningTypeOther,
691							Message: "user message text part does not have the right type",
692						})
693						continue
694					}
695					content = append(content, openai.ChatCompletionContentPartUnionParam{
696						OfText: &openai.ChatCompletionContentPartTextParam{
697							Text: textPart.Text,
698						},
699					})
700				case ai.ContentTypeFile:
701					filePart, ok := ai.AsContentType[ai.FilePart](c)
702					if !ok {
703						warnings = append(warnings, ai.CallWarning{
704							Type:    ai.CallWarningTypeOther,
705							Message: "user message file part does not have the right type",
706						})
707						continue
708					}
709
710					switch {
711					case strings.HasPrefix(filePart.MediaType, "image/"):
712						// Handle image files
713						base64Encoded := base64.StdEncoding.EncodeToString(filePart.Data)
714						data := "data:" + filePart.MediaType + ";base64," + base64Encoded
715						imageURL := openai.ChatCompletionContentPartImageImageURLParam{URL: data}
716
717						// Check for provider-specific options like image detail
718						if providerOptions, ok := filePart.ProviderOptions[Name]; ok {
719							if detail, ok := providerOptions.(*ProviderFileOptions); ok {
720								imageURL.Detail = detail.ImageDetail
721							}
722						}
723
724						imageBlock := openai.ChatCompletionContentPartImageParam{ImageURL: imageURL}
725						content = append(content, openai.ChatCompletionContentPartUnionParam{OfImageURL: &imageBlock})
726
727					case filePart.MediaType == "audio/wav":
728						// Handle WAV audio files
729						base64Encoded := base64.StdEncoding.EncodeToString(filePart.Data)
730						audioBlock := openai.ChatCompletionContentPartInputAudioParam{
731							InputAudio: openai.ChatCompletionContentPartInputAudioInputAudioParam{
732								Data:   base64Encoded,
733								Format: "wav",
734							},
735						}
736						content = append(content, openai.ChatCompletionContentPartUnionParam{OfInputAudio: &audioBlock})
737
738					case filePart.MediaType == "audio/mpeg" || filePart.MediaType == "audio/mp3":
739						// Handle MP3 audio files
740						base64Encoded := base64.StdEncoding.EncodeToString(filePart.Data)
741						audioBlock := openai.ChatCompletionContentPartInputAudioParam{
742							InputAudio: openai.ChatCompletionContentPartInputAudioInputAudioParam{
743								Data:   base64Encoded,
744								Format: "mp3",
745							},
746						}
747						content = append(content, openai.ChatCompletionContentPartUnionParam{OfInputAudio: &audioBlock})
748
749					case filePart.MediaType == "application/pdf":
750						// Handle PDF files
751						dataStr := string(filePart.Data)
752
753						// Check if data looks like a file ID (starts with "file-")
754						if strings.HasPrefix(dataStr, "file-") {
755							fileBlock := openai.ChatCompletionContentPartFileParam{
756								File: openai.ChatCompletionContentPartFileFileParam{
757									FileID: param.NewOpt(dataStr),
758								},
759							}
760							content = append(content, openai.ChatCompletionContentPartUnionParam{OfFile: &fileBlock})
761						} else {
762							// Handle as base64 data
763							base64Encoded := base64.StdEncoding.EncodeToString(filePart.Data)
764							data := "data:application/pdf;base64," + base64Encoded
765
766							filename := filePart.Filename
767							if filename == "" {
768								// Generate default filename based on content index
769								filename = fmt.Sprintf("part-%d.pdf", len(content))
770							}
771
772							fileBlock := openai.ChatCompletionContentPartFileParam{
773								File: openai.ChatCompletionContentPartFileFileParam{
774									Filename: param.NewOpt(filename),
775									FileData: param.NewOpt(data),
776								},
777							}
778							content = append(content, openai.ChatCompletionContentPartUnionParam{OfFile: &fileBlock})
779						}
780
781					default:
782						warnings = append(warnings, ai.CallWarning{
783							Type:    ai.CallWarningTypeOther,
784							Message: fmt.Sprintf("file part media type %s not supported", filePart.MediaType),
785						})
786					}
787				}
788			}
789			messages = append(messages, openai.UserMessage(content))
790		case ai.MessageRoleAssistant:
791			// simple assistant message just text content
792			if len(msg.Content) == 1 && msg.Content[0].GetType() == ai.ContentTypeText {
793				textPart, ok := ai.AsContentType[ai.TextPart](msg.Content[0])
794				if !ok {
795					warnings = append(warnings, ai.CallWarning{
796						Type:    ai.CallWarningTypeOther,
797						Message: "assistant message text part does not have the right type",
798					})
799					continue
800				}
801				messages = append(messages, openai.AssistantMessage(textPart.Text))
802				continue
803			}
804			assistantMsg := openai.ChatCompletionAssistantMessageParam{
805				Role: "assistant",
806			}
807			for _, c := range msg.Content {
808				switch c.GetType() {
809				case ai.ContentTypeText:
810					textPart, ok := ai.AsContentType[ai.TextPart](c)
811					if !ok {
812						warnings = append(warnings, ai.CallWarning{
813							Type:    ai.CallWarningTypeOther,
814							Message: "assistant message text part does not have the right type",
815						})
816						continue
817					}
818					assistantMsg.Content = openai.ChatCompletionAssistantMessageParamContentUnion{
819						OfString: param.NewOpt(textPart.Text),
820					}
821				case ai.ContentTypeToolCall:
822					toolCallPart, ok := ai.AsContentType[ai.ToolCallPart](c)
823					if !ok {
824						warnings = append(warnings, ai.CallWarning{
825							Type:    ai.CallWarningTypeOther,
826							Message: "assistant message tool part does not have the right type",
827						})
828						continue
829					}
830					assistantMsg.ToolCalls = append(assistantMsg.ToolCalls,
831						openai.ChatCompletionMessageToolCallUnionParam{
832							OfFunction: &openai.ChatCompletionMessageFunctionToolCallParam{
833								ID:   toolCallPart.ToolCallID,
834								Type: "function",
835								Function: openai.ChatCompletionMessageFunctionToolCallFunctionParam{
836									Name:      toolCallPart.ToolName,
837									Arguments: toolCallPart.Input,
838								},
839							},
840						})
841				}
842			}
843			messages = append(messages, openai.ChatCompletionMessageParamUnion{
844				OfAssistant: &assistantMsg,
845			})
846		case ai.MessageRoleTool:
847			for _, c := range msg.Content {
848				if c.GetType() != ai.ContentTypeToolResult {
849					warnings = append(warnings, ai.CallWarning{
850						Type:    ai.CallWarningTypeOther,
851						Message: "tool message can only have tool result content",
852					})
853					continue
854				}
855
856				toolResultPart, ok := ai.AsContentType[ai.ToolResultPart](c)
857				if !ok {
858					warnings = append(warnings, ai.CallWarning{
859						Type:    ai.CallWarningTypeOther,
860						Message: "tool message result part does not have the right type",
861					})
862					continue
863				}
864
865				switch toolResultPart.Output.GetType() {
866				case ai.ToolResultContentTypeText:
867					output, ok := ai.AsToolResultOutputType[ai.ToolResultOutputContentText](toolResultPart.Output)
868					if !ok {
869						warnings = append(warnings, ai.CallWarning{
870							Type:    ai.CallWarningTypeOther,
871							Message: "tool result output does not have the right type",
872						})
873						continue
874					}
875					messages = append(messages, openai.ToolMessage(output.Text, toolResultPart.ToolCallID))
876				case ai.ToolResultContentTypeError:
877					// TODO: check if better handling is needed
878					output, ok := ai.AsToolResultOutputType[ai.ToolResultOutputContentError](toolResultPart.Output)
879					if !ok {
880						warnings = append(warnings, ai.CallWarning{
881							Type:    ai.CallWarningTypeOther,
882							Message: "tool result output does not have the right type",
883						})
884						continue
885					}
886					messages = append(messages, openai.ToolMessage(output.Error.Error(), toolResultPart.ToolCallID))
887				}
888			}
889		}
890	}
891	return messages, warnings
892}
893
894// parseAnnotationsFromDelta parses annotations from the raw JSON of a delta.
895func parseAnnotationsFromDelta(delta openai.ChatCompletionChunkChoiceDelta) []openai.ChatCompletionMessageAnnotation {
896	var annotations []openai.ChatCompletionMessageAnnotation
897
898	// Parse the raw JSON to extract annotations
899	var deltaData map[string]any
900	if err := json.Unmarshal([]byte(delta.RawJSON()), &deltaData); err != nil {
901		return annotations
902	}
903
904	// Check if annotations exist in the delta
905	if annotationsData, ok := deltaData["annotations"].([]any); ok {
906		for _, annotationData := range annotationsData {
907			if annotationMap, ok := annotationData.(map[string]any); ok {
908				if annotationType, ok := annotationMap["type"].(string); ok && annotationType == "url_citation" {
909					if urlCitationData, ok := annotationMap["url_citation"].(map[string]any); ok {
910						annotation := openai.ChatCompletionMessageAnnotation{
911							Type: "url_citation",
912							URLCitation: openai.ChatCompletionMessageAnnotationURLCitation{
913								URL:   urlCitationData["url"].(string),
914								Title: urlCitationData["title"].(string),
915							},
916						}
917						annotations = append(annotations, annotation)
918					}
919				}
920			}
921		}
922	}
923
924	return annotations
925}