1package oai
2
3import (
4 "cmp"
5 "context"
6 "encoding/json"
7 "errors"
8 "fmt"
9 "log/slog"
10 "math/rand/v2"
11 "net/http"
12 "strings"
13 "time"
14
15 "github.com/sashabaranov/go-openai"
16 "shelley.exe.dev/llm"
17)
18
19const (
20 DefaultMaxTokens = 8192
21
22 OpenAIURL = "https://api.openai.com/v1"
23 FireworksURL = "https://api.fireworks.ai/inference/v1"
24 CerebrasURL = "https://api.cerebras.ai/v1"
25 LlamaCPPURL = "http://host.docker.internal:1234/v1"
26 TogetherURL = "https://api.together.xyz/v1"
27 GeminiURL = "https://generativelanguage.googleapis.com/v1beta/openai/"
28 MistralURL = "https://api.mistral.ai/v1"
29 MoonshotURL = "https://api.moonshot.ai/v1"
30
31 // Environment variable names for API keys
32 OpenAIAPIKeyEnv = "OPENAI_API_KEY"
33 FireworksAPIKeyEnv = "FIREWORKS_API_KEY"
34 CerebrasAPIKeyEnv = "CEREBRAS_API_KEY"
35 TogetherAPIKeyEnv = "TOGETHER_API_KEY"
36 GeminiAPIKeyEnv = "GEMINI_API_KEY"
37 MistralAPIKeyEnv = "MISTRAL_API_KEY"
38 MoonshotAPIKeyEnv = "MOONSHOT_API_KEY"
39)
40
41type Model struct {
42 UserName string // provided by the user to identify this model (e.g. "gpt4.1")
43 ModelName string // provided to the service provide to specify which model to use (e.g. "gpt-4.1-2025-04-14")
44 URL string
45 APIKeyEnv string // environment variable name for the API key
46 IsReasoningModel bool // whether this model is a reasoning model (e.g. O3, O4-mini)
47 UseSimplifiedPatch bool // whether to use the simplified patch input schema; defaults to false
48}
49
50var (
51 DefaultModel = GPT41
52
53 GPT41 = Model{
54 UserName: "gpt4.1",
55 ModelName: "gpt-4.1-2025-04-14",
56 URL: OpenAIURL,
57 APIKeyEnv: OpenAIAPIKeyEnv,
58 }
59
60 GPT4o = Model{
61 UserName: "gpt4o",
62 ModelName: "gpt-4o-2024-08-06",
63 URL: OpenAIURL,
64 APIKeyEnv: OpenAIAPIKeyEnv,
65 }
66
67 GPT4oMini = Model{
68 UserName: "gpt4o-mini",
69 ModelName: "gpt-4o-mini-2024-07-18",
70 URL: OpenAIURL,
71 APIKeyEnv: OpenAIAPIKeyEnv,
72 }
73
74 GPT41Mini = Model{
75 UserName: "gpt4.1-mini",
76 ModelName: "gpt-4.1-mini-2025-04-14",
77 URL: OpenAIURL,
78 APIKeyEnv: OpenAIAPIKeyEnv,
79 }
80
81 GPT41Nano = Model{
82 UserName: "gpt4.1-nano",
83 ModelName: "gpt-4.1-nano-2025-04-14",
84 URL: OpenAIURL,
85 APIKeyEnv: OpenAIAPIKeyEnv,
86 }
87
88 O3 = Model{
89 UserName: "o3",
90 ModelName: "o3-2025-04-16",
91 URL: OpenAIURL,
92 APIKeyEnv: OpenAIAPIKeyEnv,
93 IsReasoningModel: true,
94 }
95
96 O4Mini = Model{
97 UserName: "o4-mini",
98 ModelName: "o4-mini-2025-04-16",
99 URL: OpenAIURL,
100 APIKeyEnv: OpenAIAPIKeyEnv,
101 IsReasoningModel: true,
102 }
103
104 Gemini25Flash = Model{
105 UserName: "gemini-flash-2.5",
106 ModelName: "gemini-2.5-flash-preview-04-17",
107 URL: GeminiURL,
108 APIKeyEnv: GeminiAPIKeyEnv,
109 }
110
111 Gemini25Pro = Model{
112 UserName: "gemini-pro-2.5",
113 ModelName: "gemini-2.5-pro-preview-03-25",
114 URL: GeminiURL,
115 // GRRRR. Really??
116 // Input is: $1.25, prompts <= 200k tokens, $2.50, prompts > 200k tokens
117 // Output is: $10.00, prompts <= 200k tokens, $15.00, prompts > 200k
118 // Caching is: $0.31, prompts <= 200k tokens, $0.625, prompts > 200k, $4.50 / 1,000,000 tokens per hour
119 // Whatever that means. Are we caching? I have no idea.
120 // How do you always manage to be the annoying one, Google?
121 // I'm not complicating things just for you.
122 APIKeyEnv: GeminiAPIKeyEnv,
123 }
124
125 TogetherDeepseekV3 = Model{
126 UserName: "together-deepseek-v3",
127 ModelName: "deepseek-ai/DeepSeek-V3",
128 URL: TogetherURL,
129 APIKeyEnv: TogetherAPIKeyEnv,
130 }
131
132 TogetherDeepseekR1 = Model{
133 UserName: "together-deepseek-r1",
134 ModelName: "deepseek-ai/DeepSeek-R1",
135 URL: TogetherURL,
136 APIKeyEnv: TogetherAPIKeyEnv,
137 }
138
139 TogetherLlama4Maverick = Model{
140 UserName: "together-llama4-maverick",
141 ModelName: "meta-llama/Llama-4-Maverick-17B-128E-Instruct-FP8",
142 URL: TogetherURL,
143 APIKeyEnv: TogetherAPIKeyEnv,
144 }
145
146 FireworksLlama4Maverick = Model{
147 UserName: "fireworks-llama4-maverick",
148 ModelName: "accounts/fireworks/models/llama4-maverick-instruct-basic",
149 URL: FireworksURL,
150 APIKeyEnv: FireworksAPIKeyEnv,
151 }
152
153 TogetherLlama3_3_70B = Model{
154 UserName: "together-llama3-70b",
155 ModelName: "meta-llama/Llama-3.3-70B-Instruct-Turbo",
156 URL: TogetherURL,
157 APIKeyEnv: TogetherAPIKeyEnv,
158 }
159
160 TogetherMistralSmall = Model{
161 UserName: "together-mistral-small",
162 ModelName: "mistralai/Mistral-Small-24B-Instruct-2501",
163 URL: TogetherURL,
164 APIKeyEnv: TogetherAPIKeyEnv,
165 }
166
167 TogetherQwen3 = Model{
168 UserName: "together-qwen3",
169 ModelName: "Qwen/Qwen3-235B-A22B-fp8-tput",
170 URL: TogetherURL,
171 APIKeyEnv: TogetherAPIKeyEnv,
172 }
173
174 TogetherGemma2 = Model{
175 UserName: "together-gemma2",
176 ModelName: "google/gemma-2-27b-it",
177 URL: TogetherURL,
178 APIKeyEnv: TogetherAPIKeyEnv,
179 }
180
181 LlamaCPP = Model{
182 UserName: "llama.cpp",
183 ModelName: "llama.cpp local model",
184 URL: LlamaCPPURL,
185 APIKeyEnv: "NONE",
186 }
187
188 FireworksDeepseekV3 = Model{
189 UserName: "fireworks-deepseek-v3",
190 ModelName: "accounts/fireworks/models/deepseek-v3-0324",
191 URL: FireworksURL,
192 APIKeyEnv: FireworksAPIKeyEnv,
193 }
194
195 MoonshotKimiK2 = Model{
196 UserName: "moonshot-kimi-k2",
197 ModelName: "moonshot-v1-auto",
198 URL: MoonshotURL,
199 APIKeyEnv: MoonshotAPIKeyEnv,
200 }
201
202 MistralMedium = Model{
203 UserName: "mistral-medium-3",
204 ModelName: "mistral-medium-latest",
205 URL: MistralURL,
206 APIKeyEnv: MistralAPIKeyEnv,
207 }
208
209 DevstralSmall = Model{
210 UserName: "devstral-small",
211 ModelName: "devstral-small-latest",
212 URL: MistralURL,
213 APIKeyEnv: MistralAPIKeyEnv,
214 }
215
216 Qwen3CoderFireworks = Model{
217 UserName: "qwen3-coder-fireworks",
218 ModelName: "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct",
219 URL: FireworksURL,
220 APIKeyEnv: FireworksAPIKeyEnv,
221 UseSimplifiedPatch: true,
222 }
223
224 Qwen3CoderCerebras = Model{
225 UserName: "qwen3-coder-cerebras",
226 ModelName: "qwen-3-coder-480b",
227 URL: CerebrasURL,
228 APIKeyEnv: CerebrasAPIKeyEnv,
229 }
230
231 Qwen3Coder30Fireworks = Model{
232 UserName: "qwen3-coder-30-fireworks",
233 ModelName: "accounts/fireworks/models/qwen3-30b-a3b",
234 URL: FireworksURL,
235 APIKeyEnv: FireworksAPIKeyEnv,
236 UseSimplifiedPatch: true,
237 }
238
239 ZaiGLM45CoderFireworks = Model{
240 UserName: "zai-glm45-fireworks",
241 ModelName: "accounts/fireworks/models/glm-4p5",
242 URL: FireworksURL,
243 APIKeyEnv: FireworksAPIKeyEnv,
244 }
245
246 GLM4P6Fireworks = Model{
247 UserName: "glm-4p6-fireworks",
248 ModelName: "accounts/fireworks/models/glm-4p6",
249 URL: FireworksURL,
250 APIKeyEnv: FireworksAPIKeyEnv,
251 }
252
253 GLM47Fireworks = Model{
254 UserName: "glm-4.7-fireworks",
255 ModelName: "accounts/fireworks/models/glm-4p7",
256 URL: FireworksURL,
257 APIKeyEnv: FireworksAPIKeyEnv,
258 }
259
260 GPTOSS20B = Model{
261 UserName: "gpt-oss-20b",
262 ModelName: "accounts/fireworks/models/gpt-oss-20b",
263 URL: FireworksURL,
264 APIKeyEnv: FireworksAPIKeyEnv,
265 }
266
267 GPTOSS120B = Model{
268 UserName: "gpt-oss-120b",
269 ModelName: "accounts/fireworks/models/gpt-oss-120b",
270 URL: FireworksURL,
271 APIKeyEnv: FireworksAPIKeyEnv,
272 }
273
274 GPT5 = Model{
275 UserName: "gpt-5-thinking",
276 ModelName: "gpt-5.1",
277 URL: OpenAIURL,
278 APIKeyEnv: OpenAIAPIKeyEnv,
279 }
280
281 GPT5Mini = Model{
282 UserName: "gpt-5-thinking-mini",
283 ModelName: "gpt-5.1-mini",
284 URL: OpenAIURL,
285 APIKeyEnv: OpenAIAPIKeyEnv,
286 }
287
288 GPT5Nano = Model{
289 UserName: "gpt-5-thinking-nano",
290 ModelName: "gpt-5.1-nano",
291 URL: OpenAIURL,
292 APIKeyEnv: OpenAIAPIKeyEnv,
293 }
294
295 GPT5Codex = Model{
296 UserName: "gpt-5.1-codex",
297 ModelName: "gpt-5.1-codex",
298 URL: OpenAIURL,
299 APIKeyEnv: OpenAIAPIKeyEnv,
300 }
301
302 GPT52Codex = Model{
303 UserName: "gpt-5.2-codex",
304 ModelName: "gpt-5.2-codex",
305 URL: OpenAIURL,
306 APIKeyEnv: OpenAIAPIKeyEnv,
307 }
308
309 GPT53Codex = Model{
310 UserName: "gpt-5.3-codex",
311 ModelName: "gpt-5.3-codex",
312 URL: OpenAIURL,
313 APIKeyEnv: OpenAIAPIKeyEnv,
314 }
315
316 // Skaband-specific model names.
317 // Provider details (URL and APIKeyEnv) are handled by skaband
318 Qwen = Model{
319 UserName: "qwen",
320 ModelName: "qwen", // skaband will map this to the actual provider model
321 UseSimplifiedPatch: true,
322 }
323 GLM = Model{
324 UserName: "glm",
325 ModelName: "glm", // skaband will map this to the actual provider model
326 }
327)
328
329// Service provides chat completions.
330// Fields should not be altered concurrently with calling any method on Service.
331type Service struct {
332 HTTPC *http.Client // defaults to http.DefaultClient if nil
333 APIKey string // optional, if not set will try to load from env var
334 Model Model // defaults to DefaultModel if zero value
335 ModelURL string // optional, overrides Model.URL
336 MaxTokens int // defaults to DefaultMaxTokens if zero
337 Org string // optional - organization ID
338}
339
340var _ llm.Service = (*Service)(nil)
341
342// ModelsRegistry is a registry of all known models with their user-friendly names.
343var ModelsRegistry = []Model{
344 GPT41,
345 GPT41Mini,
346 GPT41Nano,
347 GPT4o,
348 GPT4oMini,
349 GPT5,
350 GPT5Mini,
351 GPT5Nano,
352 GPT5Codex,
353 GPT52Codex,
354 GPT53Codex,
355 O3,
356 O4Mini,
357 Gemini25Flash,
358 Gemini25Pro,
359 TogetherDeepseekV3,
360 TogetherDeepseekR1,
361 TogetherLlama4Maverick,
362 TogetherLlama3_3_70B,
363 TogetherMistralSmall,
364 TogetherQwen3,
365 TogetherGemma2,
366 LlamaCPP,
367 FireworksDeepseekV3,
368 MoonshotKimiK2,
369 FireworksLlama4Maverick,
370 MistralMedium,
371 DevstralSmall,
372 Qwen3CoderFireworks,
373 Qwen3Coder30Fireworks,
374 Qwen3CoderCerebras,
375 ZaiGLM45CoderFireworks,
376 GLM4P6Fireworks,
377 GLM47Fireworks,
378 GPTOSS120B,
379 GPTOSS20B,
380 // Skaband-supported models
381 Qwen,
382 GLM,
383}
384
385// ListModels returns a list of all available models with their user-friendly names.
386func ListModels() []string {
387 var names []string
388 for _, model := range ModelsRegistry {
389 if model.UserName != "" {
390 names = append(names, model.UserName)
391 }
392 }
393 return names
394}
395
396// ModelByUserName returns a model by its user-friendly name.
397// Returns nil if no model with the given name is found.
398func ModelByUserName(name string) Model {
399 for _, model := range ModelsRegistry {
400 if model.UserName == name {
401 return model
402 }
403 }
404 return Model{}
405}
406
407func (m Model) IsZero() bool {
408 return m == Model{}
409}
410
411var (
412 fromLLMRole = map[llm.MessageRole]string{
413 llm.MessageRoleAssistant: "assistant",
414 llm.MessageRoleUser: "user",
415 }
416 fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
417 llm.ToolChoiceTypeAuto: "auto",
418 llm.ToolChoiceTypeAny: "any",
419 llm.ToolChoiceTypeNone: "none",
420 llm.ToolChoiceTypeTool: "function", // OpenAI uses "function" instead of "tool"
421 }
422 toLLMRole = map[string]llm.MessageRole{
423 "assistant": llm.MessageRoleAssistant,
424 "user": llm.MessageRoleUser,
425 }
426 toLLMStopReason = map[string]llm.StopReason{
427 "stop": llm.StopReasonStopSequence,
428 "length": llm.StopReasonMaxTokens,
429 "tool_calls": llm.StopReasonToolUse,
430 "function_call": llm.StopReasonToolUse, // Map both to ToolUse
431 "content_filter": llm.StopReasonStopSequence, // No direct equivalent
432 }
433)
434
435// fromLLMContent converts llm.Content to the format expected by OpenAI.
436func fromLLMContent(c llm.Content) (string, []openai.ToolCall) {
437 switch c.Type {
438 case llm.ContentTypeText:
439 return c.Text, nil
440 case llm.ContentTypeToolUse:
441 // For OpenAI, tool use is sent as a null content with tool_calls in the message
442 return "", []openai.ToolCall{
443 {
444 Type: openai.ToolTypeFunction,
445 ID: c.ID, // Use the content ID if provided
446 Function: openai.FunctionCall{
447 Name: c.ToolName,
448 Arguments: string(c.ToolInput),
449 },
450 },
451 }
452 case llm.ContentTypeToolResult:
453 // Tool results in OpenAI are sent as a separate message with tool_call_id
454 // OpenAI doesn't support multiple content items or images in tool results
455 // Combine all text content into a single string
456 var resultText string
457 if len(c.ToolResult) > 0 {
458 // Collect all text from content objects
459 texts := make([]string, 0, len(c.ToolResult))
460 for _, result := range c.ToolResult {
461 if result.Text != "" {
462 texts = append(texts, result.Text)
463 }
464 }
465 resultText = strings.Join(texts, "\n")
466 }
467 return resultText, nil
468 default:
469 // For thinking or other types, convert to text
470 return c.Text, nil
471 }
472}
473
474// fromLLMMessage converts llm.Message to OpenAI ChatCompletionMessage format
475func fromLLMMessage(msg llm.Message) []openai.ChatCompletionMessage {
476 // For OpenAI, we need to handle tool results differently than regular messages
477 // Each tool result becomes its own message with role="tool"
478
479 var messages []openai.ChatCompletionMessage
480
481 // Check if this is a regular message or contains tool results
482 var regularContent []llm.Content
483 var toolResults []llm.Content
484
485 for _, c := range msg.Content {
486 if c.Type == llm.ContentTypeToolResult {
487 toolResults = append(toolResults, c)
488 } else {
489 regularContent = append(regularContent, c)
490 }
491 }
492
493 // Process tool results as separate messages, but first
494 for _, tr := range toolResults {
495 // Convert toolresult array to a string for OpenAI
496 // Collect all text from content objects
497 var texts []string
498 for _, result := range tr.ToolResult {
499 if strings.TrimSpace(result.Text) != "" {
500 texts = append(texts, result.Text)
501 }
502 }
503 toolResultContent := strings.Join(texts, "\n")
504
505 // OpenAI doesn't have an explicit error field for tool results, so add it directly to the content.
506 if tr.ToolError {
507 if toolResultContent != "" {
508 toolResultContent = "error: " + toolResultContent
509 } else {
510 toolResultContent = "error: tool execution failed"
511 }
512 }
513
514 m := openai.ChatCompletionMessage{
515 Role: "tool",
516 Content: cmp.Or(toolResultContent, " "), // Use empty space if empty to avoid omitempty issues
517 ToolCallID: tr.ToolUseID,
518 }
519 messages = append(messages, m)
520 }
521 // Process regular content second
522 if len(regularContent) > 0 {
523 m := openai.ChatCompletionMessage{
524 Role: fromLLMRole[msg.Role],
525 }
526
527 // For assistant messages that contain tool calls
528 var toolCalls []openai.ToolCall
529 var textContent string
530
531 for _, c := range regularContent {
532 content, tools := fromLLMContent(c)
533 if len(tools) > 0 {
534 toolCalls = append(toolCalls, tools...)
535 } else if content != "" {
536 if textContent != "" {
537 textContent += "\n"
538 }
539 textContent += content
540 }
541 }
542
543 m.Content = textContent
544 m.ToolCalls = toolCalls
545
546 messages = append(messages, m)
547 }
548
549 return messages
550}
551
552// fromLLMToolChoice converts llm.ToolChoice to the format expected by OpenAI.
553func fromLLMToolChoice(tc *llm.ToolChoice) any {
554 if tc == nil {
555 return nil
556 }
557
558 if tc.Type == llm.ToolChoiceTypeTool && tc.Name != "" {
559 return openai.ToolChoice{
560 Type: openai.ToolTypeFunction,
561 Function: openai.ToolFunction{
562 Name: tc.Name,
563 },
564 }
565 }
566
567 // For non-specific tool choice, just use the string
568 return fromLLMToolChoiceType[tc.Type]
569}
570
571// fromLLMTool converts llm.Tool to the format expected by OpenAI.
572func fromLLMTool(t *llm.Tool) openai.Tool {
573 return openai.Tool{
574 Type: openai.ToolTypeFunction,
575 Function: &openai.FunctionDefinition{
576 Name: t.Name,
577 Description: t.Description,
578 Parameters: t.InputSchema,
579 },
580 }
581}
582
583// fromLLMSystem converts llm.SystemContent to an OpenAI system message.
584func fromLLMSystem(systemContent []llm.SystemContent) []openai.ChatCompletionMessage {
585 if len(systemContent) == 0 {
586 return nil
587 }
588
589 // Combine all system content into a single system message
590 var systemText string
591 for i, content := range systemContent {
592 if i > 0 && systemText != "" && content.Text != "" {
593 systemText += "\n"
594 }
595 systemText += content.Text
596 }
597
598 if systemText == "" {
599 return nil
600 }
601
602 return []openai.ChatCompletionMessage{
603 {
604 Role: "system",
605 Content: systemText,
606 },
607 }
608}
609
610// toRawLLMContent converts a raw content string from OpenAI to llm.Content.
611func toRawLLMContent(content string) llm.Content {
612 return llm.Content{
613 Type: llm.ContentTypeText,
614 Text: content,
615 }
616}
617
618// toToolCallLLMContent converts a tool call from OpenAI to llm.Content.
619func toToolCallLLMContent(toolCall openai.ToolCall) llm.Content {
620 // Generate a content ID if needed
621 id := toolCall.ID
622 if id == "" {
623 // Create a deterministic ID based on the function name if no ID is provided
624 id = "tc_" + toolCall.Function.Name
625 }
626
627 return llm.Content{
628 ID: id,
629 Type: llm.ContentTypeToolUse,
630 ToolName: toolCall.Function.Name,
631 ToolInput: json.RawMessage(toolCall.Function.Arguments),
632 }
633}
634
635// toToolResultLLMContent converts a tool result message from OpenAI to llm.Content.
636func toToolResultLLMContent(msg openai.ChatCompletionMessage) llm.Content {
637 return llm.Content{
638 Type: llm.ContentTypeToolResult,
639 ToolUseID: msg.ToolCallID,
640 ToolResult: []llm.Content{{
641 Type: llm.ContentTypeText,
642 Text: msg.Content,
643 }},
644 ToolError: false, // OpenAI doesn't specify errors explicitly; error information is parsed from content
645 }
646}
647
648// toLLMContents converts message content from OpenAI to []llm.Content.
649func toLLMContents(msg openai.ChatCompletionMessage) []llm.Content {
650 var contents []llm.Content
651
652 // If this is a tool response, handle it separately
653 if msg.Role == "tool" && msg.ToolCallID != "" {
654 return []llm.Content{toToolResultLLMContent(msg)}
655 }
656
657 // If there's text content, add it
658 if msg.Content != "" {
659 contents = append(contents, toRawLLMContent(msg.Content))
660 }
661
662 // If there are tool calls, add them
663 for _, tc := range msg.ToolCalls {
664 contents = append(contents, toToolCallLLMContent(tc))
665 }
666
667 // If empty, add an empty text content
668 if len(contents) == 0 {
669 contents = append(contents, llm.Content{
670 Type: llm.ContentTypeText,
671 Text: "",
672 })
673 }
674
675 return contents
676}
677
678// toLLMUsage converts usage information from OpenAI to llm.Usage.
679func (s *Service) toLLMUsage(au openai.Usage, headers http.Header) llm.Usage {
680 // fmt.Printf("raw usage: %+v / %v / %v\n", au, au.PromptTokensDetails, au.CompletionTokensDetails)
681 in := uint64(au.PromptTokens)
682 var inc uint64
683 if au.PromptTokensDetails != nil {
684 inc = uint64(au.PromptTokensDetails.CachedTokens)
685 }
686 out := uint64(au.CompletionTokens)
687 u := llm.Usage{
688 InputTokens: in,
689 CacheReadInputTokens: inc,
690 CacheCreationInputTokens: in,
691 OutputTokens: out,
692 }
693 u.CostUSD = llm.CostUSDFromResponse(headers)
694 return u
695}
696
697// toLLMResponse converts the OpenAI response to llm.Response.
698func (s *Service) toLLMResponse(r *openai.ChatCompletionResponse) *llm.Response {
699 // fmt.Printf("Raw response\n")
700 // enc := json.NewEncoder(os.Stdout)
701 // enc.SetIndent("", " ")
702 // enc.Encode(r)
703 // fmt.Printf("\n")
704
705 if len(r.Choices) == 0 {
706 return &llm.Response{
707 ID: r.ID,
708 Model: r.Model,
709 Role: llm.MessageRoleAssistant,
710 Usage: s.toLLMUsage(r.Usage, r.Header()),
711 }
712 }
713
714 // Process the primary choice
715 choice := r.Choices[0]
716
717 return &llm.Response{
718 ID: r.ID,
719 Model: r.Model,
720 Role: toRoleFromString(choice.Message.Role),
721 Content: toLLMContents(choice.Message),
722 StopReason: toStopReason(string(choice.FinishReason)),
723 Usage: s.toLLMUsage(r.Usage, r.Header()),
724 }
725}
726
727// toRoleFromString converts a role string to llm.MessageRole.
728func toRoleFromString(role string) llm.MessageRole {
729 if role == "tool" || role == "system" || role == "function" {
730 return llm.MessageRoleAssistant // Map special roles to assistant for consistency
731 }
732 if mr, ok := toLLMRole[role]; ok {
733 return mr
734 }
735 return llm.MessageRoleUser // Default to user if unknown
736}
737
738// toStopReason converts a finish reason string to llm.StopReason.
739func toStopReason(reason string) llm.StopReason {
740 if sr, ok := toLLMStopReason[reason]; ok {
741 return sr
742 }
743 return llm.StopReasonStopSequence // Default
744}
745
746// TokenContextWindow returns the maximum token context window size for this service
747func (s *Service) TokenContextWindow() int {
748 // TODO: move TokenContextWindow information to Model struct
749
750 model := cmp.Or(s.Model, DefaultModel)
751
752 // OpenAI models generally have 128k context windows
753 // Some newer models have larger windows, but 128k is a safe default
754 switch model.ModelName {
755 case "gpt-4.1-2025-04-14", "gpt-4.1-mini-2025-04-14", "gpt-4.1-nano-2025-04-14":
756 return 200000 // 200k for newer GPT-4.1 models
757 case "gpt-4o-2024-08-06", "gpt-4o-mini-2024-07-18":
758 return 128000 // 128k for GPT-4o models
759 case "o3-2025-04-16", "o3-mini-2025-04-16":
760 return 200000 // 200k for O3 models
761 case "accounts/fireworks/models/qwen3-coder-480b-a35b-instruct":
762 return 256000 // 256k native context for Qwen3-Coder
763 case "glm", "zai-glm45-fireworks":
764 return 128000
765 case "qwen", "qwen3-coder-cerebras", "qwen3-coder-fireworks":
766 return 256000 // 256k native context for Qwen3-Coder
767 case "gpt-oss-20b", "gpt-oss-120b":
768 return 128000
769 case "gpt-5.1", "gpt-5.1-mini", "gpt-5.1-nano":
770 return 256000
771 default:
772 // Default for unknown models
773 return 128000
774 }
775}
776
777// MaxImageDimension returns the maximum allowed image dimension.
778// TODO: determine actual OpenAI image dimension limits
779func (s *Service) MaxImageDimension() int {
780 return 0 // No known limit
781}
782
783// Do sends a request to OpenAI using the go-openai package.
784func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) {
785 // Configure the OpenAI client
786 httpc := cmp.Or(s.HTTPC, http.DefaultClient)
787 model := cmp.Or(s.Model, DefaultModel)
788
789 // TODO: do this one during Service setup? maybe with a constructor instead?
790 config := openai.DefaultConfig(s.APIKey)
791 baseURL := cmp.Or(s.ModelURL, model.URL)
792 if baseURL != "" {
793 config.BaseURL = baseURL
794 }
795 if s.Org != "" {
796 config.OrgID = s.Org
797 }
798 config.HTTPClient = httpc
799
800 client := openai.NewClientWithConfig(config)
801
802 // Start with system messages if provided
803 var allMessages []openai.ChatCompletionMessage
804 if len(ir.System) > 0 {
805 sysMessages := fromLLMSystem(ir.System)
806 allMessages = append(allMessages, sysMessages...)
807 }
808
809 // Add regular and tool messages
810 for _, msg := range ir.Messages {
811 msgs := fromLLMMessage(msg)
812 allMessages = append(allMessages, msgs...)
813 }
814
815 // Convert tools
816 var tools []openai.Tool
817 for _, t := range ir.Tools {
818 tools = append(tools, fromLLMTool(t))
819 }
820
821 // Create the OpenAI request
822 req := openai.ChatCompletionRequest{
823 Model: model.ModelName,
824 Messages: allMessages,
825 Tools: tools,
826 ToolChoice: fromLLMToolChoice(ir.ToolChoice), // TODO: make fromLLMToolChoice return an error when a perfect translation is not possible
827 MaxCompletionTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens),
828 }
829 // Construct the full URL for logging and debugging
830 fullURL := baseURL + "/chat/completions"
831
832 // Retry mechanism
833 backoff := []time.Duration{1 * time.Second, 2 * time.Second, 5 * time.Second, 10 * time.Second, 15 * time.Second}
834
835 // retry loop
836 var errs error // accumulated errors across all attempts
837 for attempts := 0; ; attempts++ {
838 if attempts > 10 {
839 return nil, fmt.Errorf("openai request failed after %d attempts (url=%s, model=%s): %w", attempts, fullURL, model.ModelName, errs)
840 }
841 if attempts > 0 {
842 sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
843 slog.WarnContext(ctx, "openai request sleep before retry", "sleep", sleep, "attempts", attempts)
844 time.Sleep(sleep)
845 }
846
847 resp, err := client.CreateChatCompletion(ctx, req)
848
849 // Handle successful response
850 if err == nil {
851 return s.toLLMResponse(&resp), nil
852 }
853
854 // Handle errors
855 // Check for TLS "bad record MAC" errors and retry once
856 if strings.Contains(err.Error(), "tls: bad record MAC") && attempts == 0 {
857 slog.WarnContext(ctx, "tls bad record MAC error, retrying once", "error", err.Error())
858 errs = errors.Join(errs, fmt.Errorf("TLS error (attempt %d): %w", attempts+1, err))
859 continue
860 }
861
862 var apiErr *openai.APIError
863 if ok := errors.As(err, &apiErr); !ok {
864 // Not an OpenAI API error, return immediately with accumulated errors
865 return nil, errors.Join(errs, fmt.Errorf("url=%s model=%s: %w", fullURL, model.ModelName, err))
866 }
867
868 switch {
869 case apiErr.HTTPStatusCode >= 500:
870 // Server error, try again with backoff
871 slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode, "url", fullURL, "model", model.ModelName)
872 errs = errors.Join(errs, fmt.Errorf("status %d (url=%s, model=%s): %s", apiErr.HTTPStatusCode, fullURL, model.ModelName, apiErr.Error()))
873 continue
874
875 case apiErr.HTTPStatusCode == 429:
876 // Rate limited, accumulate error and retry
877 slog.WarnContext(ctx, "openai_request_rate_limited", "error", apiErr.Error(), "url", fullURL, "model", model.ModelName)
878 errs = errors.Join(errs, fmt.Errorf("status %d (rate limited, url=%s, model=%s): %s", apiErr.HTTPStatusCode, fullURL, model.ModelName, apiErr.Error()))
879 continue
880
881 case apiErr.HTTPStatusCode >= 400 && apiErr.HTTPStatusCode < 500:
882 // Client error, probably unrecoverable
883 slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode, "url", fullURL, "model", model.ModelName)
884 return nil, errors.Join(errs, fmt.Errorf("status %d (url=%s, model=%s): %s", apiErr.HTTPStatusCode, fullURL, model.ModelName, apiErr.Error()))
885
886 default:
887 // Other error, accumulate and retry
888 slog.WarnContext(ctx, "openai_request_failed", "error", apiErr.Error(), "status_code", apiErr.HTTPStatusCode, "url", fullURL, "model", model.ModelName)
889 errs = errors.Join(errs, fmt.Errorf("status %d (url=%s, model=%s): %s", apiErr.HTTPStatusCode, fullURL, model.ModelName, apiErr.Error()))
890 continue
891 }
892 }
893}
894
895func (s *Service) UseSimplifiedPatch() bool {
896 return s.Model.UseSimplifiedPatch
897}
898
899// ConfigDetails returns configuration information for logging
900func (s *Service) ConfigDetails() map[string]string {
901 model := cmp.Or(s.Model, DefaultModel)
902 baseURL := cmp.Or(s.ModelURL, model.URL, OpenAIURL)
903 return map[string]string{
904 "base_url": baseURL,
905 "model_name": model.ModelName,
906 "full_url": baseURL + "/chat/completions",
907 "api_key_env": model.APIKeyEnv,
908 "has_api_key_set": fmt.Sprintf("%v", s.APIKey != ""),
909 }
910}