Detailed changes
@@ -107,6 +107,9 @@ installs, tests, or any other substantive operation.
To change the working directory persistently, use the change_dir tool.
+IMPORTANT: Keep commands concise. The command input must be less than 60k tokens.
+For complex scripts, write them to a file first and then execute the file.
+
<pwd>%s</pwd>
`
// If you modify this, update the termui template for prettier rendering.
@@ -103,6 +103,10 @@ Recipes:
Usage notes:
- All inputs are interpreted literally (no automatic newline or whitespace handling)
- For replace operations, oldText must appear EXACTLY ONCE in the file
+
+IMPORTANT: Each patch call must be less than 60k tokens total. For large file
+changes, break them into multiple smaller patch operations rather than one
+large overwrite. Prefer incremental replace operations over full file overwrites.
`
// If you modify this, update the termui template for prettier rendering.
@@ -317,7 +321,7 @@ func (p *PatchTool) patchParse(m json.RawMessage) (PatchInput, error) {
originalErr = err
}
var inputOneString PatchInputOneString
- if err := json.Unmarshal(m, &inputOneString); err == nil {
+ if err := json.Unmarshal(m, &inputOneString); err == nil && inputOneString.Patches != "" {
var onePatch PatchRequest
if err := json.Unmarshal([]byte(inputOneString.Patches), &onePatch); err == nil && onePatch.Operation != "" {
return PatchInput{Path: inputOneString.Path, Patches: []PatchRequest{onePatch}}, nil
@@ -331,6 +335,10 @@ func (p *PatchTool) patchParse(m json.RawMessage) (PatchInput, error) {
originalErr = err
}
}
+ // If JSON parsed but patches field was missing/empty, provide a clear error
+ if originalErr == nil {
+ return PatchInput{}, fmt.Errorf("patches field is missing or empty (this may indicate a truncated LLM response)\nJSON: %s", string(m))
+ }
return PatchInput{}, fmt.Errorf("failed to unmarshal patch input: %w\nJSON: %s", originalErr, string(m))
}
@@ -308,6 +308,23 @@ func TestPatchTool_ErrorCases(t *testing.T) {
if result.Error == nil || !strings.Contains(result.Error.Error(), "clipboard") {
t.Error("expected clipboard error")
}
+
+ // Test missing patches field (simulates truncated LLM response)
+ msg = json.RawMessage(`{"path":"server/dashboard.go"}`)
+ result = patch.Run(ctx, msg)
+ if result.Error == nil {
+ t.Error("expected error for missing patches field")
+ }
+ if !strings.Contains(result.Error.Error(), "missing or empty") {
+ t.Errorf("expected 'missing or empty' in error, got: %v", result.Error)
+ }
+
+ // Test empty patches array
+ msg = json.RawMessage(`{"path":"server/dashboard.go","patches":[]}`)
+ result = patch.Run(ctx, msg)
+ if result.Error == nil {
+ t.Error("expected error for empty patches array")
+ }
}
func TestPatchTool_FlexibleInputParsing(t *testing.T) {
@@ -236,7 +236,7 @@ type request struct {
TopP float64 `json:"top_p,omitempty"`
StopSequences []string `json:"stop_sequences,omitempty"`
- TokenEfficientToolUse bool `json:"-"` // DO NOT USE, broken on Anthropic's side as of 2025-02-28
+
}
func mapped[Slice ~[]E, E, T any](s Slice, f func(E) T) []T {
@@ -481,8 +481,6 @@ func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error
}
backoff := []time.Duration{15 * time.Second, 30 * time.Second, time.Minute}
- largerMaxTokens := false
- var partialUsage usage
url := cmp.Or(s.URL, DefaultURL)
httpc := cmp.Or(s.HTTPC, http.DefaultClient)
@@ -522,18 +520,6 @@ func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error
req.Header.Set("X-API-Key", s.APIKey)
req.Header.Set("Anthropic-Version", "2023-06-01")
- var features []string
- if request.TokenEfficientToolUse {
- features = append(features, "token-efficient-tool-use-2025-02-19")
- }
- if largerMaxTokens {
- features = append(features, "output-128k-2025-02-19")
- request.MaxTokens = 128 * 1024
- }
- if len(features) > 0 {
- req.Header.Set("anthropic-beta", strings.Join(features, ","))
- }
-
resp, err := httpc.Do(req)
if err != nil {
// Don't retry httprr cache misses
@@ -566,19 +552,7 @@ func (s *Service) Do(ctx context.Context, ir *llm.Request) (*llm.Response, error
if err != nil {
return nil, errors.Join(errs, err)
}
- if response.StopReason == "max_tokens" && !largerMaxTokens {
- slog.InfoContext(ctx, "anthropic_retrying_with_larger_tokens", "message", "Retrying Anthropic API call with larger max tokens size")
- // Retry with more output tokens.
- largerMaxTokens = true
- response.Usage.CostUSD = llm.CostUSDFromResponse(resp.Header)
- partialUsage = response.Usage
- continue
- }
-
// Calculate and set the cost_usd field
- if largerMaxTokens {
- response.Usage.Add(partialUsage)
- }
response.Usage.CostUSD = llm.CostUSDFromResponse(resp.Header)
endTime := time.Now()
@@ -288,6 +288,12 @@ func (l *Loop) processLLMRequest(ctx context.Context) error {
return l.handleToolCalls(ctx, resp.Content)
}
+ // Handle max tokens truncation - record error message for the user
+ if resp.StopReason == llm.StopReasonMaxTokens {
+ l.logger.Warn("LLM response truncated due to max tokens")
+ return l.handleMaxTokensTruncation(ctx)
+ }
+
// End of turn - check for git state changes
l.checkGitStateChange(ctx)
@@ -331,6 +337,37 @@ func (l *Loop) checkGitStateChange(ctx context.Context) {
}
}
+// handleMaxTokensTruncation handles the case where the LLM response was truncated
+// due to hitting the maximum output token limit. It records an error message
+// informing the user and instructing the LLM to use smaller outputs.
+func (l *Loop) handleMaxTokensTruncation(ctx context.Context) error {
+ errorMessage := llm.Message{
+ Role: llm.MessageRoleUser,
+ Content: []llm.Content{
+ {
+ Type: llm.ContentTypeText,
+ Text: "[SYSTEM ERROR: Your previous response was truncated because it exceeded the maximum output token limit. " +
+ "Any tool calls in that response were lost. Please retry with smaller, incremental changes. " +
+ "For file operations, break large changes into multiple smaller patches. " +
+ "The user can ask you to continue if needed.]",
+ },
+ },
+ }
+
+ l.mu.Lock()
+ l.history = append(l.history, errorMessage)
+ l.mu.Unlock()
+
+ // Record the error message
+ if err := l.recordMessage(ctx, errorMessage, llm.Usage{}); err != nil {
+ l.logger.Error("failed to record truncation error message", "error", err)
+ }
+
+ // End the turn - don't automatically continue
+ l.checkGitStateChange(ctx)
+ return nil
+}
+
// handleToolCalls processes tool calls from the LLM response
func (l *Loop) handleToolCalls(ctx context.Context, content []llm.Content) error {
var toolResults []llm.Content
@@ -7,6 +7,7 @@ import (
"os"
"os/exec"
"path/filepath"
+ "strings"
"sync"
"testing"
"time"
@@ -1164,3 +1165,77 @@ func runGit(t *testing.T, dir string, args ...string) {
t.Fatalf("git %v failed: %v\n%s", args, err, output)
}
}
+
+func TestMaxTokensTruncation(t *testing.T) {
+ var recordedMessages []llm.Message
+ var mu sync.Mutex
+
+ recordFunc := func(ctx context.Context, message llm.Message, usage llm.Usage) error {
+ mu.Lock()
+ defer mu.Unlock()
+ recordedMessages = append(recordedMessages, message)
+ return nil
+ }
+
+ service := NewPredictableService()
+ loop := NewLoop(Config{
+ LLM: service,
+ History: []llm.Message{},
+ Tools: []*llm.Tool{},
+ RecordMessage: recordFunc,
+ })
+
+ // Queue a user message that triggers max_tokens response
+ userMessage := llm.Message{
+ Role: llm.MessageRoleUser,
+ Content: []llm.Content{{Type: llm.ContentTypeText, Text: "maxTokens"}},
+ }
+ loop.QueueUserMessage(userMessage)
+
+ // Process the turn - should end with error message about truncation
+ ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
+ defer cancel()
+
+ err := loop.ProcessOneTurn(ctx)
+ if err != nil {
+ t.Fatalf("ProcessOneTurn failed: %v", err)
+ }
+
+ // Check that messages were recorded:
+ // 1. First assistant message (truncated)
+ // 2. User error message about truncation
+ mu.Lock()
+ numMessages := len(recordedMessages)
+ mu.Unlock()
+
+ if numMessages != 2 {
+ mu.Lock()
+ for i, msg := range recordedMessages {
+ t.Logf("Message %d: role=%v, content=%v", i, msg.Role, msg.Content)
+ }
+ mu.Unlock()
+ t.Fatalf("expected 2 recorded messages (truncated response, error message), got %d", numMessages)
+ }
+
+ // Verify the first message was the truncated assistant response
+ mu.Lock()
+ firstMsg := recordedMessages[0]
+ mu.Unlock()
+ if firstMsg.Role != llm.MessageRoleAssistant {
+ t.Errorf("expected first message to be assistant, got %v", firstMsg.Role)
+ }
+
+ // Verify the second message is the error/system message about truncation
+ mu.Lock()
+ secondMsg := recordedMessages[1]
+ mu.Unlock()
+ if secondMsg.Role != llm.MessageRoleUser {
+ t.Errorf("expected second message to be user (system error), got %v", secondMsg.Role)
+ }
+ if !strings.Contains(secondMsg.Content[0].Text, "truncated") {
+ t.Errorf("expected error message to mention truncation, got %q", secondMsg.Content[0].Text)
+ }
+ if !strings.Contains(secondMsg.Content[0].Text, "smaller") {
+ t.Errorf("expected error message to suggest smaller changes, got %q", secondMsg.Content[0].Text)
+ }
+}
@@ -124,6 +124,10 @@ func (s *PredictableService) Do(ctx context.Context, req *llm.Request) (*llm.Res
// Trigger a patch with malformed JSON (simulates Anthropic sending invalid JSON)
return s.makeMalformedPatchToolResponse(inputTokens), nil
+ case "maxTokens":
+ // Simulate a max_tokens truncation
+ return s.makeMaxTokensResponse("This is a truncated response that was cut off mid-sentence because the output token limit was", inputTokens), nil
+
default:
// Handle pattern-based inputs
if strings.HasPrefix(inputText, "echo: ") {
@@ -175,6 +179,29 @@ func (s *PredictableService) Do(ctx context.Context, req *llm.Request) (*llm.Res
}
}
+// makeMaxTokensResponse creates a response that simulates hitting max_tokens limit
+func (s *PredictableService) makeMaxTokensResponse(text string, inputTokens uint64) *llm.Response {
+ outputTokens := uint64(len(text) / 4)
+ if outputTokens == 0 {
+ outputTokens = 1
+ }
+ return &llm.Response{
+ ID: fmt.Sprintf("pred-%d", time.Now().UnixNano()),
+ Type: "message",
+ Role: llm.MessageRoleAssistant,
+ Model: "predictable-v1",
+ Content: []llm.Content{
+ {Type: llm.ContentTypeText, Text: text},
+ },
+ StopReason: llm.StopReasonMaxTokens,
+ Usage: llm.Usage{
+ InputTokens: inputTokens,
+ OutputTokens: outputTokens,
+ CostUSD: 0.001,
+ },
+ }
+}
+
// makeResponse creates a simple text response
func (s *PredictableService) makeResponse(text string, inputTokens uint64) *llm.Response {
outputTokens := uint64(len(text) / 4) // ~4 chars per token