ant.go

  1package ant
  2
  3import (
  4	"bytes"
  5	"cmp"
  6	"context"
  7	"encoding/json"
  8	"errors"
  9	"fmt"
 10	"io"
 11	"log/slog"
 12	"math/rand/v2"
 13	"net/http"
 14	"strings"
 15	"time"
 16
 17	"shelley.exe.dev/llm"
 18)
 19
 20const (
 21	DefaultModel = Claude45Sonnet
 22	// See https://docs.anthropic.com/en/docs/about-claude/models/all-models for
 23	// current maximums. There's currently a flag to enable 128k output (output-128k-2025-02-19)
 24	DefaultMaxTokens = 8192
 25	APIKeyEnv        = "ANTHROPIC_API_KEY"
 26	DefaultURL       = "https://api.anthropic.com/v1/messages"
 27)
 28
 29const (
 30	Claude45Haiku  = "claude-haiku-4-5-20251001"
 31	Claude37Sonnet = "claude-3-7-sonnet-20250219"
 32	Claude4Sonnet  = "claude-sonnet-4-20250514"
 33	Claude45Sonnet = "claude-sonnet-4-5-20250929"
 34	Claude45Opus   = "claude-opus-4-5-20251101"
 35	Claude46Opus   = "claude-opus-4-6"
 36)
 37
 38// IsClaudeModel reports whether userName is a user-friendly Claude model.
 39// It uses ClaudeModelName under the hood.
 40func IsClaudeModel(userName string) bool {
 41	return ClaudeModelName(userName) != ""
 42}
 43
 44// ClaudeModelName returns the Anthropic Claude model name for userName.
 45// It returns an empty string if userName is not a recognized Claude model.
 46func ClaudeModelName(userName string) string {
 47	switch userName {
 48	case "claude", "sonnet":
 49		return Claude45Sonnet
 50	case "opus":
 51		return Claude45Opus
 52	default:
 53		return ""
 54	}
 55}
 56
 57// TokenContextWindow returns the maximum token context window size for this service
 58func (s *Service) TokenContextWindow() int {
 59	model := s.Model
 60	if model == "" {
 61		model = DefaultModel
 62	}
 63
 64	switch model {
 65	case Claude37Sonnet, Claude4Sonnet, Claude45Sonnet:
 66		return 200000
 67	case Claude45Haiku:
 68		return 200000
 69	case Claude45Opus, Claude46Opus:
 70		return 200000
 71	default:
 72		// Default for unknown models
 73		return 200000
 74	}
 75}
 76
 77// MaxImageDimension returns the maximum allowed image dimension for multi-image requests.
 78// Anthropic enforces a 2000 pixel limit when multiple images are in a conversation.
 79func (s *Service) MaxImageDimension() int {
 80	return 2000
 81}
 82
 83// Service provides Claude completions.
 84// Fields should not be altered concurrently with calling any method on Service.
 85type Service struct {
 86	HTTPC         *http.Client      // defaults to http.DefaultClient if nil
 87	URL           string            // defaults to DefaultURL if empty
 88	APIKey        string            // must be non-empty
 89	Model         string            // defaults to DefaultModel if empty
 90	MaxTokens     int               // defaults to DefaultMaxTokens if zero
 91	ThinkingLevel llm.ThinkingLevel // thinking level (ThinkingLevelOff disables, default is ThinkingLevelMedium)
 92}
 93
 94var _ llm.Service = (*Service)(nil)
 95
 96type content struct {
 97	// https://docs.anthropic.com/en/api/messages
 98	ID   string `json:"id,omitempty"`
 99	Type string `json:"type,omitempty"`
100
101	// Subtly, an empty string appears in tool results often, so we have
102	// to distinguish between empty string and no string.
103	// Underlying error looks like one of:
104	//   "messages.46.content.0.tool_result.content.0.text.text: Field required""
105	//   "messages.1.content.1.tool_use.text: Extra inputs are not permitted"
106	//
107	// I haven't found a super great source for the API, but
108	// https://github.com/anthropics/anthropic-sdk-typescript/blob/main/src/resources/messages/messages.ts
109	// is somewhat acceptable but hard to read.
110	Text      *string         `json:"text,omitempty"`
111	MediaType string          `json:"media_type,omitempty"` // for image
112	Source    json.RawMessage `json:"source,omitempty"`     // for image
113
114	// for thinking
115	Thinking  string `json:"thinking,omitempty"`
116	Data      string `json:"data,omitempty"`      // for redacted_thinking or image
117	Signature string `json:"signature,omitempty"` // for thinking
118
119	// for tool_use
120	ToolName  string          `json:"name,omitempty"`
121	ToolInput json.RawMessage `json:"input,omitempty"`
122
123	// for tool_result
124	ToolUseID string `json:"tool_use_id,omitempty"`
125	ToolError bool   `json:"is_error,omitempty"`
126	// note the recursive nature here; message looks like:
127	// {
128	//  "role": "user",
129	//  "content": [
130	//    {
131	//      "type": "tool_result",
132	//      "tool_use_id": "toolu_01A09q90qw90lq917835lq9",
133	//      "content": [
134	//        {"type": "text", "text": "15 degrees"},
135	//        {
136	//          "type": "image",
137	//          "source": {
138	//            "type": "base64",
139	//            "media_type": "image/jpeg",
140	//            "data": "/9j/4AAQSkZJRg...",
141	//          }
142	//        }
143	//      ]
144	//    }
145	//  ]
146	//}
147	ToolResult []content `json:"content,omitempty"`
148
149	// timing information for tool_result; not sent to Claude
150	StartTime *time.Time `json:"-"`
151	EndTime   *time.Time `json:"-"`
152
153	CacheControl json.RawMessage `json:"cache_control,omitempty"`
154}
155
156// message represents a message in the conversation.
157type message struct {
158	Role    string    `json:"role"`
159	Content []content `json:"content"`
160	ToolUse *toolUse  `json:"tool_use,omitempty"` // use to control whether/which tool to use
161}
162
163// toolUse represents a tool use in the message content.
164type toolUse struct {
165	ID   string `json:"id"`
166	Name string `json:"name"`
167}
168
169// tool represents a tool available to Claude.
170type tool struct {
171	Name string `json:"name"`
172	// Type is used by the text editor tool; see
173	// https://docs.anthropic.com/en/docs/build-with-claude/tool-use/text-editor-tool
174	Type         string          `json:"type,omitempty"`
175	Description  string          `json:"description,omitempty"`
176	InputSchema  json.RawMessage `json:"input_schema,omitempty"`
177	CacheControl json.RawMessage `json:"cache_control,omitempty"`
178}
179
180// usage represents the billing and rate-limit usage.
181type usage struct {
182	InputTokens              uint64  `json:"input_tokens"`
183	CacheCreationInputTokens uint64  `json:"cache_creation_input_tokens"`
184	CacheReadInputTokens     uint64  `json:"cache_read_input_tokens"`
185	OutputTokens             uint64  `json:"output_tokens"`
186	CostUSD                  float64 `json:"cost_usd"`
187}
188
189func (u *usage) Add(other usage) {
190	u.InputTokens += other.InputTokens
191	u.CacheCreationInputTokens += other.CacheCreationInputTokens
192	u.CacheReadInputTokens += other.CacheReadInputTokens
193	u.OutputTokens += other.OutputTokens
194	u.CostUSD += other.CostUSD
195}
196
197// response represents the response from the message API.
198type response struct {
199	ID           string    `json:"id"`
200	Type         string    `json:"type"`
201	Role         string    `json:"role"`
202	Model        string    `json:"model"`
203	Content      []content `json:"content"`
204	StopReason   string    `json:"stop_reason"`
205	StopSequence *string   `json:"stop_sequence,omitempty"`
206	Usage        usage     `json:"usage"`
207}
208
209type toolChoice struct {
210	Type string `json:"type"`
211	Name string `json:"name,omitempty"`
212}
213
214// https://docs.anthropic.com/en/api/messages#body-system
215type systemContent struct {
216	Text         string          `json:"text,omitempty"`
217	Type         string          `json:"type,omitempty"`
218	CacheControl json.RawMessage `json:"cache_control,omitempty"`
219}
220
221// request represents the request payload for creating a message.
222// thinking configures extended thinking for Claude models.
223type thinking struct {
224	Type         string `json:"type"`                    // "enabled"
225	BudgetTokens int    `json:"budget_tokens,omitempty"` // Max tokens for thinking
226}
227
228type request struct {
229	// Field order matters for JSON serialization - stable fields should come first
230	// to maximize prefix deduplication when storing LLM requests.
231	Model         string          `json:"model"`
232	MaxTokens     int             `json:"max_tokens"`
233	Stream        bool            `json:"stream,omitempty"`
234	System        []systemContent `json:"system,omitempty"`
235	Tools         []*tool         `json:"tools,omitempty"`
236	ToolChoice    *toolChoice     `json:"tool_choice,omitempty"`
237	Thinking      *thinking       `json:"thinking,omitempty"`
238	Temperature   float64         `json:"temperature,omitempty"`
239	TopK          int             `json:"top_k,omitempty"`
240	TopP          float64         `json:"top_p,omitempty"`
241	StopSequences []string        `json:"stop_sequences,omitempty"`
242	// Messages comes last since it grows with each request in a conversation
243	Messages []message `json:"messages"`
244}
245
246func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T {
247	out := make([]T, len(s))
248	for i, v := range s {
249		out[i] = f(v)
250	}
251	return out
252}
253
254func inverted[K, V cmp.Ordered](m map[K]V) map[V]K {
255	inv := make(map[V]K)
256	for k, v := range m {
257		if _, ok := inv[v]; ok {
258			panic(fmt.Errorf("inverted map has multiple keys for value %v", v))
259		}
260		inv[v] = k
261	}
262	return inv
263}
264
265var (
266	fromLLMRole = map[llm.MessageRole]string{
267		llm.MessageRoleAssistant: "assistant",
268		llm.MessageRoleUser:      "user",
269	}
270	toLLMRole = inverted(fromLLMRole)
271
272	fromLLMContentType = map[llm.ContentType]string{
273		llm.ContentTypeText:             "text",
274		llm.ContentTypeThinking:         "thinking",
275		llm.ContentTypeRedactedThinking: "redacted_thinking",
276		llm.ContentTypeToolUse:          "tool_use",
277		llm.ContentTypeToolResult:       "tool_result",
278	}
279	toLLMContentType = inverted(fromLLMContentType)
280
281	fromLLMToolChoiceType = map[llm.ToolChoiceType]string{
282		llm.ToolChoiceTypeAuto: "auto",
283		llm.ToolChoiceTypeAny:  "any",
284		llm.ToolChoiceTypeNone: "none",
285		llm.ToolChoiceTypeTool: "tool",
286	}
287
288	toLLMStopReason = map[string]llm.StopReason{
289		"stop_sequence": llm.StopReasonStopSequence,
290		"max_tokens":    llm.StopReasonMaxTokens,
291		"end_turn":      llm.StopReasonEndTurn,
292		"tool_use":      llm.StopReasonToolUse,
293		"refusal":       llm.StopReasonRefusal,
294	}
295)
296
297func fromLLMCache(c bool) json.RawMessage {
298	if !c {
299		return nil
300	}
301	return json.RawMessage(`{"type":"ephemeral"}`)
302}
303
304func fromLLMContent(c llm.Content) content {
305	var toolResult []content
306	if len(c.ToolResult) > 0 {
307		toolResult = make([]content, len(c.ToolResult))
308		for i, tr := range c.ToolResult {
309			// For image content inside a tool_result, we need to map it to "image" type
310			if tr.MediaType != "" && tr.MediaType == "image/jpeg" || tr.MediaType == "image/png" {
311				// Format as an image for Claude
312				toolResult[i] = content{
313					Type: "image",
314					Source: json.RawMessage(fmt.Sprintf(`{"type":"base64","media_type":"%s","data":"%s"}`,
315						tr.MediaType, tr.Data)),
316				}
317			} else {
318				toolResult[i] = fromLLMContent(tr)
319			}
320		}
321	}
322
323	d := content{
324		Type:         fromLLMContentType[c.Type],
325		CacheControl: fromLLMCache(c.Cache),
326	}
327
328	// Set fields based on content type to avoid sending invalid fields
329	switch c.Type {
330	case llm.ContentTypeText:
331		// Images are represented as text with MediaType and Data
332		if c.MediaType != "" {
333			d.Type = "image"
334			d.Source = json.RawMessage(fmt.Sprintf(`{"type":"base64","media_type":"%s","data":"%s"}`,
335				c.MediaType, c.Data))
336		} else {
337			d.Text = &c.Text
338		}
339	case llm.ContentTypeThinking:
340		d.Thinking = c.Thinking
341		d.Signature = c.Signature
342	case llm.ContentTypeRedactedThinking:
343		d.Data = c.Data
344		d.Signature = c.Signature
345	case llm.ContentTypeToolUse:
346		d.ID = c.ID
347		d.ToolName = c.ToolName
348		d.ToolInput = c.ToolInput
349	case llm.ContentTypeToolResult:
350		d.ToolUseID = c.ToolUseID
351		d.ToolError = c.ToolError
352		d.ToolResult = toolResult
353	}
354
355	return d
356}
357
358func fromLLMToolUse(tu *llm.ToolUse) *toolUse {
359	if tu == nil {
360		return nil
361	}
362	return &toolUse{
363		ID:   tu.ID,
364		Name: tu.Name,
365	}
366}
367
368func fromLLMMessage(msg llm.Message) message {
369	return message{
370		Role:    fromLLMRole[msg.Role],
371		Content: mapped(msg.Content, fromLLMContent),
372		ToolUse: fromLLMToolUse(msg.ToolUse),
373	}
374}
375
376func fromLLMToolChoice(tc *llm.ToolChoice) *toolChoice {
377	if tc == nil {
378		return nil
379	}
380	return &toolChoice{
381		Type: fromLLMToolChoiceType[tc.Type],
382		Name: tc.Name,
383	}
384}
385
386func fromLLMTool(t *llm.Tool) *tool {
387	return &tool{
388		Name:         t.Name,
389		Type:         t.Type,
390		Description:  t.Description,
391		InputSchema:  t.InputSchema,
392		CacheControl: fromLLMCache(t.Cache),
393	}
394}
395
396func fromLLMSystem(s llm.SystemContent) systemContent {
397	return systemContent{
398		Text:         s.Text,
399		Type:         s.Type,
400		CacheControl: fromLLMCache(s.Cache),
401	}
402}
403
404func (s *Service) fromLLMRequest(r *llm.Request) *request {
405	maxTokens := cmp.Or(s.MaxTokens, DefaultMaxTokens)
406
407	req := &request{
408		Model:      cmp.Or(s.Model, DefaultModel),
409		Messages:   mapped(r.Messages, fromLLMMessage),
410		MaxTokens:  maxTokens,
411		ToolChoice: fromLLMToolChoice(r.ToolChoice),
412		Tools:      mapped(r.Tools, fromLLMTool),
413		System:     mapped(r.System, fromLLMSystem),
414	}
415
416	// Enable extended thinking if a thinking level is set
417	if s.ThinkingLevel != llm.ThinkingLevelOff {
418		budget := s.ThinkingLevel.ThinkingBudgetTokens()
419		// Ensure max_tokens > budget_tokens as required by Anthropic API
420		if maxTokens <= budget {
421			req.MaxTokens = budget + 1024
422		}
423		req.Thinking = &thinking{Type: "enabled", BudgetTokens: budget}
424	}
425	return req
426}
427
428func toLLMUsage(u usage) llm.Usage {
429	return llm.Usage{
430		InputTokens:              u.InputTokens,
431		CacheCreationInputTokens: u.CacheCreationInputTokens,
432		CacheReadInputTokens:     u.CacheReadInputTokens,
433		OutputTokens:             u.OutputTokens,
434		CostUSD:                  u.CostUSD,
435	}
436}
437
438func toLLMContent(c content) llm.Content {
439	// Convert toolResult from []content to []llm.Content
440	var toolResultContents []llm.Content
441	if len(c.ToolResult) > 0 {
442		toolResultContents = make([]llm.Content, len(c.ToolResult))
443		for i, tr := range c.ToolResult {
444			toolResultContents[i] = toLLMContent(tr)
445		}
446	}
447
448	ret := llm.Content{
449		ID:         c.ID,
450		Type:       toLLMContentType[c.Type],
451		MediaType:  c.MediaType,
452		Thinking:   c.Thinking,
453		Data:       c.Data,
454		Signature:  c.Signature,
455		ToolName:   c.ToolName,
456		ToolInput:  c.ToolInput,
457		ToolUseID:  c.ToolUseID,
458		ToolError:  c.ToolError,
459		ToolResult: toolResultContents,
460	}
461	if c.Text != nil {
462		ret.Text = *c.Text
463	}
464	return ret
465}
466
467func toLLMResponse(r *response) *llm.Response {
468	return &llm.Response{
469		ID:           r.ID,
470		Type:         r.Type,
471		Role:         toLLMRole[r.Role],
472		Model:        r.Model,
473		Content:      mapped(r.Content, toLLMContent),
474		StopReason:   toLLMStopReason[r.StopReason],
475		StopSequence: r.StopSequence,
476		Usage:        toLLMUsage(r.Usage),
477	}
478}
479
480// Do sends a request to Anthropic.
481func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error) {
482	startTime := time.Now()
483	request := s.fromLLMRequest(ir)
484	payload, err := json.Marshal(request)
485	if err != nil {
486		return nil, err
487	}
488	payload = append(payload, '\n')
489
490	backoff := []time.Duration{15 * time.Second, 30 * time.Second, time.Minute}
491
492	url := cmp.Or(s.URL, DefaultURL)
493	httpc := cmp.Or(s.HTTPC, http.DefaultClient)
494
495	// retry loop
496	var errs error // accumulated errors across all attempts
497	for attempts := 0; ; attempts++ {
498		if attempts > 10 {
499			return nil, fmt.Errorf("anthropic request failed after %d attempts: %w", attempts, errs)
500		}
501		if attempts > 0 {
502			sleep := backoff[min(attempts, len(backoff)-1)] + time.Duration(rand.Int64N(int64(time.Second)))
503			slog.WarnContext(ctx, "anthropic request sleep before retry", "sleep", sleep, "attempts", attempts)
504			time.Sleep(sleep)
505		}
506		req, err := http.NewRequestWithContext(ctx, "POST", url, bytes.NewReader(payload))
507		if err != nil {
508			return nil, errors.Join(errs, err)
509		}
510
511		req.Header.Set("Content-Type", "application/json")
512		req.Header.Set("X-API-Key", s.APIKey)
513		req.Header.Set("Anthropic-Version", "2023-06-01")
514
515		resp, err := httpc.Do(req)
516		if err != nil {
517			// Don't retry httprr cache misses
518			if strings.Contains(err.Error(), "cached HTTP response not found") {
519				return nil, err
520			}
521			errs = errors.Join(errs, err)
522			continue
523		}
524		buf, err := io.ReadAll(resp.Body)
525		resp.Body.Close()
526		if err != nil {
527			errs = errors.Join(errs, err)
528			continue
529		}
530
531		switch {
532		case resp.StatusCode == http.StatusOK:
533			var response response
534			err = json.NewDecoder(bytes.NewReader(buf)).Decode(&response)
535			if err != nil {
536				return nil, errors.Join(errs, err)
537			}
538			// Calculate and set the cost_usd field
539			response.Usage.CostUSD = llm.CostUSDFromResponse(resp.Header)
540
541			endTime := time.Now()
542			result := toLLMResponse(&response)
543			result.StartTime = &startTime
544			result.EndTime = &endTime
545			return result, nil
546		case resp.StatusCode >= 500 && resp.StatusCode < 600:
547			// server error, retry
548			slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode, "url", url, "model", s.Model)
549			errs = errors.Join(errs, fmt.Errorf("status %v (url=%s, model=%s): %s", resp.Status, url, cmp.Or(s.Model, DefaultModel), buf))
550			continue
551		case resp.StatusCode == 429:
552			// rate limited, retry
553			slog.WarnContext(ctx, "anthropic_request_rate_limited", "response", string(buf), "url", url, "model", s.Model)
554			errs = errors.Join(errs, fmt.Errorf("status %v (url=%s, model=%s): %s", resp.Status, url, cmp.Or(s.Model, DefaultModel), buf))
555			continue
556		case resp.StatusCode >= 400 && resp.StatusCode < 500:
557			// some other 400, probably unrecoverable
558			slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode, "url", url, "model", s.Model)
559			return nil, errors.Join(errs, fmt.Errorf("status %v (url=%s, model=%s): %s", resp.Status, url, cmp.Or(s.Model, DefaultModel), buf))
560		default:
561			// ...retry, I guess?
562			slog.WarnContext(ctx, "anthropic_request_failed", "response", string(buf), "status_code", resp.StatusCode, "url", url, "model", s.Model)
563			errs = errors.Join(errs, fmt.Errorf("status %v (url=%s, model=%s): %s", resp.Status, url, cmp.Or(s.Model, DefaultModel), buf))
564			continue
565		}
566	}
567}
568
569// For debugging only, Claude can definitely handle the full patch tool.
570// func (s *Service) UseSimplifiedPatch() bool {
571// 	return true
572// }
573
574// ConfigDetails returns configuration information for logging
575func (s *Service) ConfigDetails() map[string]string {
576	model := cmp.Or(s.Model, DefaultModel)
577	url := cmp.Or(s.URL, DefaultURL)
578	return map[string]string{
579		"url":             url,
580		"model":           model,
581		"has_api_key_set": fmt.Sprintf("%v", s.APIKey != ""),
582	}
583}