1package ant
2
3import (
4 "bytes"
5 "cmp"
6 "context"
7 "encoding/json"
8 "errors"
9 "fmt"
10 "io"
11 "log/slog"
12 "math/rand/v2"
13 "net/http"
14 "strings"
15 "time"
16
17 "shelley.exe.dev/llm"
18)
19
20const (
21 DefaultModel = Claude45Sonnet
22 // See https://docs.anthropic.com/en/docs/about-claude/models/all-models for
23 // current maximums. There's currently a flag to enable 128k output (output-128k-2025-02-19)
24 DefaultMaxTokens = 8192
25 APIKeyEnv = "ANTHROPIC_API_KEY"
26 DefaultURL = "https://api.anthropic.com/v1/messages"
27)
28
29const (
30 Claude45Haiku = "claude-haiku-4-5-20251001"
31 Claude37Sonnet = "claude-3-7-sonnet-20250219"
32 Claude4Sonnet = "claude-sonnet-4-20250514"
33 Claude45Sonnet = "claude-sonnet-4-5-20250929"
34 Claude45Opus = "claude-opus-4-5-20251101"
35 Claude46Opus = "claude-opus-4-6"
36)
37
38// IsClaudeModel reports whether userName is a user-friendly Claude model.
39// It uses ClaudeModelName under the hood.
40func IsClaudeModel(userName string) bool {
41 return ClaudeModelName(userName) != ""
42}
43
44// ClaudeModelName returns the Anthropic Claude model name for userName.
45// It returns an empty string if userName is not a recognized Claude model.
46func ClaudeModelName(userName string) string {
47 switch userName {
48 case "claude", "sonnet":
49 return Claude45Sonnet
50 case "opus":
51 return Claude45Opus
52 default:
53 return ""
54 }
55}
56
57// TokenContextWindow returns the maximum token context window size for this service
58func (s *Service) TokenContextWindow() int {
59 model := s.Model
60 if model == "" {
61 model = DefaultModel
62 }
63
64 switch model {
65 case Claude37Sonnet, Claude4Sonnet, Claude45Sonnet:
66 return 200000
67 case Claude45Haiku:
68 return 200000
69 case Claude45Opus, Claude46Opus:
70 return 200000
71 default:
72 // Default for unknown models
73 return 200000
74 }
75}
76
77// MaxImageDimension returns the maximum allowed image dimension for multi-image requests.
78// Anthropic enforces a 2000 pixel limit when multiple images are in a conversation.
79func (s *Service) MaxImageDimension() int {
80 return 2000
81}
82
83// Service provides Claude completions.
84// Fields should not be altered concurrently with calling any method on Service.
85type Service struct {
86 HTTPC *http.Client // defaults to http.DefaultClient if nil
87 URL string // defaults to DefaultURL if empty
88 APIKey string // must be non-empty
89 Model string // defaults to DefaultModel if empty
90 MaxTokens int // defaults to DefaultMaxTokens if zero
91 ThinkingLevel llm.ThinkingLevel // thinking level (ThinkingLevelOff disables, default is ThinkingLevelMedium)
92}
93
94var _ llm.Service = (*Service)(nil)
95
96type content struct {
97 // https://docs.anthropic.com/en/api/messages
98 ID string `json:"id,omitempty"`
99 Type string `json:"type,omitempty"`
100
101 // Subtly, an empty string appears in tool results often, so we have
102 // to distinguish between empty string and no string.
103 // Underlying error looks like one of:
104 // "messages.46.content.0.tool_result.content.0.text.text: Field required""
105 // "messages.1.content.1.tool_use.text: Extra inputs are not permitted"
106 //
107 // I haven't found a super great source for the API, but
108 // https://github.com/anthropics/anthropic-sdk-typescript/blob/main/src/resources/messages/messages.ts
109 // is somewhat acceptable but hard to read.
110 Text *string `json:"text,omitempty"`
111 MediaType string `json:"media_type,omitempty"` // for image
112 Source json.RawMessage `json:"source,omitempty"` // for image
113
114 // for thinking
115 Thinking string `json:"thinking,omitempty"`
116 Data string `json:"data,omitempty"` // for redacted_thinking or image
117 Signature string `json:"signature,omitempty"` // for thinking
118
119 // for tool_use
120 ToolName string `json:"name,omitempty"`
121 ToolInput json.RawMessage `json:"input,omitempty"`
122
123 // for tool_result
124 ToolUseID string `json:"tool_use_id,omitempty"`
125 ToolError bool `json:"is_error,omitempty"`
126 // note the recursive nature here; message looks like:
127 // {
128 // "role": "user",
129 // "content": [
130 // {
131 // "type": "tool_result",
132 // "tool_use_id": "toolu_01A09q90qw90lq917835lq9",
133 // "content": [
134 // {"type": "text", "text": "15 degrees"},
135 // {
136 // "type": "image",
137 // "source": {
138 // "type": "base64",
139 // "media_type": "image/jpeg",
140 // "data": "/9j/4AAQSkZJRg...",
141 // }
142 // }
143 // ]
144 // }
145 // ]
146 //}
147 ToolResult []content `json:"content,omitempty"`
148
149 // timing information for tool_result; not sent to Claude
150 StartTime *time.Time `json:"-"`
151 EndTime *time.Time `json:"-"`
152
153 CacheControl json.RawMessage `json:"cache_control,omitempty"`
154}
155
156// message represents a message in the conversation.
157type message struct {
158 Role string `json:"role"`
159 Content []content `json:"content"`
160 ToolUse *toolUse `json:"tool_use,omitempty"` // use to control whether/which tool to use
161}
162
163// toolUse represents a tool use in the message content.
164type toolUse struct {
165 ID string `json:"id"`
166 Name string `json:"name"`
167}
168
169// tool represents a tool available to Claude.
170type tool struct {
171 Name string `json:"name"`
172 // Type is used by the text editor tool; see
173 // https://docs.anthropic.com/en/docs/build-with-claude/tool-use/text-editor-tool
174 Type string `json:"type,omitempty"`
175 Description string `json:"description,omitempty"`
176 InputSchema json.RawMessage `json:"input_schema,omitempty"`
177 CacheControl json.RawMessage `json:"cache_control,omitempty"`
178}
179
180// usage represents the billing and rate-limit usage.
181type usage struct {
182 InputTokens uint64 `json:"input_tokens"`
183 CacheCreationInputTokens uint64 `json:"cache_creation_input_tokens"`
184 CacheReadInputTokens uint64 `json:"cache_read_input_tokens"`
185 OutputTokens uint64 `json:"output_tokens"`
186 CostUSD float64 `json:"cost_usd"`
187}
188
189func (u *usage) Add(other usage) {
190 u.InputTokens += other.InputTokens
191 u.CacheCreationInputTokens += other.CacheCreationInputTokens
192 u.CacheReadInputTokens += other.CacheReadInputTokens
193 u.OutputTokens += other.OutputTokens
194 u.CostUSD += other.CostUSD
195}
196
197// response represents the response from the message API.
198type response struct {
199 ID string `json:"id"`
200 Type string `json:"type"`
201 Role string `json:"role"`
202 Model string `json:"model"`
203 Content []content `json:"content"`
204 StopReason string `json:"stop_reason"`
205 StopSequence *string `json:"stop_sequence,omitempty"`
206 Usage usage `json:"usage"`
207}
208
209type toolChoice struct {
210 Type string `json:"type"`
211 Name string `json:"name,omitempty"`
212}
213
214// https://docs.anthropic.com/en/api/messages#body-system
215type systemContent struct {
216 Text string `json:"text,omitempty"`
217 Type string `json:"type,omitempty"`
218 CacheControl json.RawMessage `json:"cache_control,omitempty"`
219}
220
221// request represents the request payload for creating a message.
222// thinking configures extended thinking for Claude models.
223type thinking struct {
224 Type string `json:"type"` // "enabled"
225 BudgetTokens int `json:"budget_tokens,omitempty"` // Max tokens for thinking
226}
227
228type request struct {
229 // Field order matters for JSON serialization - stable fields should come first
230 // to maximize prefix deduplication when storing LLM requests.
231 Model string `json:"model"`
232 MaxTokens int `json:"max_tokens"`
233 Stream bool `json:"stream,omitempty"`
234 System []systemContent `json:"system,omitempty"`
235 Tools []*tool `json:"tools,omitempty"`
236 ToolChoice *toolChoice `json:"tool_choice,omitempty"`
237 Thinking *thinking `json:"thinking,omitempty"`
238 Temperature float64 `json:"temperature,omitempty"`
239 TopK int `json:"top_k,omitempty"`
240 TopP float64 `json:"top_p,omitempty"`
241 StopSequences []string `json:"stop_sequences,omitempty"`
242 // Messages comes last since it grows with each request in a conversation
243 Messages []message `json:"messages"`
244}
245
246func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T {
247 out := make([]T, len(s))
248 for i, v := range s {
249 out[i] = f(v)
250 }
251 return out
252}
253
254func inverted[K, V cmp.Ordered](m map[K]V) map[V]K {
255 inv := make(map[V]K)
256 for k, v := range m {
257 if _, ok := inv[v]; ok {
258 panic(fmt.Errorf("inverted map has multiple keys for value %v", v))
259 }
260 inv[v] = k
261 }
262 return inv
263}
264
265var (
266 fromLLMRole = map[llm.MessageRole]string{
267 llm.MessageRoleAssistant: "assistant",
268 llm.MessageRoleUser: "user",
269 }
270 toLLMRole = inverted(fromLLMRole)
271
272 fromLLMContentType = map[llm.ContentType]string{
273 llm.ContentTypeText: "text",
274 llm.ContentTypeThinking: "thinking",
275 llm.ContentTypeRedactedThinking: "redacted_thinking",
276 llm.ContentTypeToolUse: "tool_use",
277 llm.ContentTypeToolResult: "tool_result",
278 }
279 toLLMContentType = inverted(fromLLMContentType)
280
281 fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
282 llm.ToolChoiceTypeAuto: "auto",
283 llm.ToolChoiceTypeAny: "any",
284 llm.ToolChoiceTypeNone: "none",
285 llm.ToolChoiceTypeTool: "tool",
286 }
287
288 toLLMStopReason = map[string]llm.StopReason{
289 "stop_sequence": llm.StopReasonStopSequence,
290 "max_tokens": llm.StopReasonMaxTokens,
291 "end_turn": llm.StopReasonEndTurn,
292 "tool_use": llm.StopReasonToolUse,
293 "refusal": llm.StopReasonRefusal,
294 }
295)
296
297func fromLLMCache(c bool) json.RawMessage {
298 if !c {
299 return nil
300 }
301 return json.RawMessage(`{"type":"ephemeral"}`)
302}
303
304func fromLLMContent(c llm.Content) content {
305 var toolResult []content
306 if len(c.ToolResult) > 0 {
307 toolResult = make([]content, len(c.ToolResult))
308 for i, tr := range c.ToolResult {
309 // For image content inside a tool_result, we need to map it to "image" type
310 if tr.MediaType != "" && tr.MediaType == "image/jpeg" || tr.MediaType == "image/png" {
311 // Format as an image for Claude
312 toolResult[i] = content{
313 Type: "image",
314 Source: json.RawMessage(fmt.Sprintf(`{"type":"base64","media_type":"%s","data":"%s"}`,
315 tr.MediaType, tr.Data)),
316 }
317 } else {
318 toolResult[i] = fromLLMContent(tr)
319 }
320 }
321 }
322
323 d := content{
324 Type: fromLLMContentType[c.Type],
325 CacheControl: fromLLMCache(c.Cache),
326 }
327
328 // Set fields based on content type to avoid sending invalid fields
329 switch c.Type {
330 case llm.ContentTypeText:
331 // Images are represented as text with MediaType and Data
332 if c.MediaType != "" {
333 d.Type = "image"
334 d.Source = json.RawMessage(fmt.Sprintf(`{"type":"base64","media_type":"%s","data":"%s"}`,
335 c.MediaType, c.Data))
336 } else {
337 d.Text = &c.Text
338 }
339 case llm.ContentTypeThinking:
340 d.Thinking = c.Thinking
341 d.Signature = c.Signature
342 case llm.ContentTypeRedactedThinking:
343 d.Data = c.Data
344 d.Signature = c.Signature
345 case llm.ContentTypeToolUse:
346 d.ID = c.ID
347 d.ToolName = c.ToolName
348 d.ToolInput = c.ToolInput
349 case llm.ContentTypeToolResult:
350 d.ToolUseID = c.ToolUseID
351 d.ToolError = c.ToolError
352 d.ToolResult = toolResult
353 }
354
355 return d
356}
357
358func fromLLMToolUse(tu *llm.ToolUse) *toolUse {
359 if tu == nil {
360 return nil
361 }
362 return &toolUse{
363 ID: tu.ID,
364 Name: tu.Name,
365 }
366}
367
368func fromLLMMessage(msg llm.Message) message {
369 return message{
370 Role: fromLLMRole[msg.Role],
371 Content: mapped(msg.Content, fromLLMContent),
372 ToolUse: fromLLMToolUse(msg.ToolUse),
373 }
374}
375
376func fromLLMToolChoice(tc *llm.ToolChoice) *toolChoice {
377 if tc == nil {
378 return nil
379 }
380 return &toolChoice{
381 Type: fromLLMToolChoiceType[tc.Type],
382 Name: tc.Name,
383 }
384}
385
386func fromLLMTool(t *llm.Tool) *tool {
387 return &tool{
388 Name: t.Name,
389 Type: t.Type,
390 Description: t.Description,
391 InputSchema: t.InputSchema,
392 CacheControl: fromLLMCache(t.Cache),
393 }
394}
395
396func fromLLMSystem(s llm.SystemContent) systemContent {
397 return systemContent{
398 Text: s.Text,
399 Type: s.Type,
400 CacheControl: fromLLMCache(s.Cache),
401 }
402}
403
404func (s *Service) fromLLMRequest(r *llm.Request) *request {
405 maxTokens := cmp.Or(s.MaxTokens, DefaultMaxTokens)
406
407 req := &request{
408 Model: cmp.Or(s.Model, DefaultModel),
409 Messages: mapped(r.Messages, fromLLMMessage),
410 MaxTokens: maxTokens,
411 ToolChoice: fromLLMToolChoice(r.ToolChoice),
412 Tools: mapped(r.Tools, fromLLMTool),
413 System: mapped(r.System, fromLLMSystem),
414 }
415
416 // Enable extended thinking if a thinking level is set
417 if s.ThinkingLevel != llm.ThinkingLevelOff {
418 budget := s.ThinkingLevel.ThinkingBudgetTokens()
419 // Ensure max_tokens > budget_tokens as required by Anthropic API
420 if maxTokens <= budget {
421 req.MaxTokens = budget + 1024
422 }
423 req.Thinking = &thinking{Type: "enabled", BudgetTokens: budget}
424 }
425 return req
426}
427
428func toLLMUsage(u usage) llm.Usage {
429 return llm.Usage{
430 InputTokens: u.InputTokens,
431 CacheCreationInputTokens: u.CacheCreationInputTokens,
432 CacheReadInputTokens: u.CacheReadInputTokens,
433 OutputTokens: u.OutputTokens,
434 CostUSD: u.CostUSD,
435 }
436}
437
438func toLLMContent(c content) llm.Content {
439 // Convert toolResult from []content to []llm.Content
440 var toolResultContents []llm.Content
441 if len(c.ToolResult) > 0 {
442 toolResultContents = make([]llm.Content, len(c.ToolResult))
443 for i, tr := range c.ToolResult {
444 toolResultContents[i] = toLLMContent(tr)
445 }
446 }
447
448 ret := llm.Content{
449 ID: c.ID,
450 Type: toLLMContentType[c.Type],
451 MediaType: c.MediaType,
452 Thinking: c.Thinking,
453 Data: c.Data,
454 Signature: c.Signature,
455 ToolName: c.ToolName,
456 ToolInput: c.ToolInput,
457 ToolUseID: c.ToolUseID,
458 ToolError: c.ToolError,
459 ToolResult: toolResultContents,
460 }
461 if c.Text != nil {
462 ret.Text = *c.Text
463 }
464 return ret
465}
466
467func toLLMResponse(r *response) *llm.Response {
468 return &llm.Response{
469 ID: r.ID,
470 Type: r.Type,
471 Role: toLLMRole[r.Role],
472 Model: r.Model,
473 Content: mapped(r.Content, toLLMContent),
474 StopReason: toLLMStopReason[r.StopReason],
475 StopSequence: r.StopSequence,
476 Usage: toLLMUsage(r.Usage),
477 }
478}
479
480// Do sends a request to Anthropic.
481func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) {
482 startTime := time.Now()
483 request := s.fromLLMRequest(ir)
484 payload, err := json.Marshal(request)
485 if err != nil {
486 return nil, err
487 }
488 payload = append(payload, '\n')
489
490 backoff := []time.Duration{15 * time.Second, 30 * time.Second, time.Minute}
491
492 url := cmp.Or(s.URL, DefaultURL)
493 httpc := cmp.Or(s.HTTPC, http.DefaultClient)
494
495 // retry loop
496 var errs error // accumulated errors across all attempts
497 for attempts := 0; ; attempts++ {
498 if attempts > 10 {
499 return nil, fmt.Errorf("anthropic request failed after %d attempts: %w", attempts, errs)
500 }
501 if attempts > 0 {
502 sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
503 slog.WarnContext(ctx, "anthropic request sleep before retry", "sleep", sleep, "attempts", attempts)
504 time.Sleep(sleep)
505 }
506 req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(payload))
507 if err != nil {
508 return nil, errors.Join(errs, err)
509 }
510
511 req.Header.Set("Content-Type", "application/json")
512 req.Header.Set("X-API-Key", s.APIKey)
513 req.Header.Set("Anthropic-Version", "2023-06-01")
514
515 resp, err := httpc.Do(req)
516 if err != nil {
517 // Don't retry httprr cache misses
518 if strings.Contains(err.Error(), "cached HTTP response not found") {
519 return nil, err
520 }
521 errs = errors.Join(errs, err)
522 continue
523 }
524 buf, err := io.ReadAll(resp.Body)
525 resp.Body.Close()
526 if err != nil {
527 errs = errors.Join(errs, err)
528 continue
529 }
530
531 switch {
532 case resp.StatusCode == http.StatusOK:
533 var response response
534 err = json.NewDecoder(bytes.NewReader(buf)).Decode(&response)
535 if err != nil {
536 return nil, errors.Join(errs, err)
537 }
538 // Calculate and set the cost_usd field
539 response.Usage.CostUSD = llm.CostUSDFromResponse(resp.Header)
540
541 endTime := time.Now()
542 result := toLLMResponse(&response)
543 result.StartTime = &startTime
544 result.EndTime = &endTime
545 return result, nil
546 case resp.StatusCode >= 500 && resp.StatusCode < 600:
547 // server error, retry
548 slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode, "url", url, "model", s.Model)
549 errs = errors.Join(errs, fmt.Errorf("status %v (url=%s, model=%s): %s", resp.Status, url, cmp.Or(s.Model, DefaultModel), buf))
550 continue
551 case resp.StatusCode == 429:
552 // rate limited, retry
553 slog.WarnContext(ctx, "anthropic_request_rate_limited", "response", string(buf), "url", url, "model", s.Model)
554 errs = errors.Join(errs, fmt.Errorf("status %v (url=%s, model=%s): %s", resp.Status, url, cmp.Or(s.Model, DefaultModel), buf))
555 continue
556 case resp.StatusCode >= 400 && resp.StatusCode < 500:
557 // some other 400, probably unrecoverable
558 slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode, "url", url, "model", s.Model)
559 return nil, errors.Join(errs, fmt.Errorf("status %v (url=%s, model=%s): %s", resp.Status, url, cmp.Or(s.Model, DefaultModel), buf))
560 default:
561 // ...retry, I guess?
562 slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode, "url", url, "model", s.Model)
563 errs = errors.Join(errs, fmt.Errorf("status %v (url=%s, model=%s): %s", resp.Status, url, cmp.Or(s.Model, DefaultModel), buf))
564 continue
565 }
566 }
567}
568
569// For debugging only, Claude can definitely handle the full patch tool.
570// func (s *Service) UseSimplifiedPatch() bool {
571// return true
572// }
573
574// ConfigDetails returns configuration information for logging
575func (s *Service) ConfigDetails() map[string]string {
576 model := cmp.Or(s.Model, DefaultModel)
577 url := cmp.Or(s.URL, DefaultURL)
578 return map[string]string{
579 "url": url,
580 "model": model,
581 "has_api_key_set": fmt.Sprintf("%v", s.APIKey != ""),
582 }
583}