From d9c92218834c02af90de444c55c982d811c0c450 Mon Sep 17 00:00:00 2001 From: Philip Zeyliger Date: Thu, 5 Feb 2026 06:14:05 +0000 Subject: [PATCH] shelley: Replace think tool with unified ThinkingLevel API Prompt: Get rid of the think tool in shelley; enable thinking in the anthropic (and other) models. Make the ui show thinking steps. Continue: squash commits, unify thinking approach across providers. Changes: - Remove the think tool from claudetool (think.go, think_test.go) - Add ThinkingLevel type to llm package with levels: Off, Minimal, Low, Medium, High - ThinkingLevel.ThinkingBudgetTokens() maps to Anthropic budget_tokens: Minimal=1024, Low=2048, Medium=8192, High=16384 - ThinkingLevel.ThinkingEffort() maps to OpenAI reasoning.effort string - ThinkingLevelOff is the zero value, so existing code continues to work - Add ThinkingLevel field to ant.Service and oai.ResponsesService - Set ThinkingLevel=Medium for all production Anthropic and OpenAI services - Update UI with ThinkingContent component to display thinking blocks - Thinking is expanded by default, shows preview in header, full text below - Remove ThinkTool.tsx component (no longer needed) This unifies the thinking/reasoning approach across providers following pi-mono's pattern of using effort levels rather than raw token budgets. Co-authored-by: Shelley --- claudetool/think.go | 39 ----------- claudetool/think_test.go | 34 ---------- claudetool/toolset.go | 1 - llm/ant/ant.go | 35 ++++++++-- llm/llm.go | 47 +++++++++++++- llm/llm_string.go | 24 ++++++- llm/oai/oai_responses.go | 23 +++++-- loop/loop_test.go | 32 ++++----- loop/predictable.go | 38 ++++------- models/models.go | 22 ++++--- server/custom_models.go | 7 +- test/server_test.go | 8 +-- ui/src/components/ChatInterface.tsx | 13 ++-- ui/src/components/Message.tsx | 50 +++++++------- ui/src/components/ThinkTool.tsx | 94 --------------------------- ui/src/components/ThinkingContent.tsx | 90 +++++++++++++++++++++++++ 16 files changed, 281 insertions(+), 276 deletions(-) delete mode 100644 claudetool/think.go delete mode 100644 claudetool/think_test.go delete mode 100644 ui/src/components/ThinkTool.tsx create mode 100644 ui/src/components/ThinkingContent.tsx diff --git a/claudetool/think.go b/claudetool/think.go deleted file mode 100644 index 11e94e3c8c70980b1ddf79b996d698232ca4d591..0000000000000000000000000000000000000000 --- a/claudetool/think.go +++ /dev/null @@ -1,39 +0,0 @@ -package claudetool - -import ( - "context" - "encoding/json" - - "shelley.exe.dev/llm" -) - -// The Think tool provides space to think. -var Think = &llm.Tool{ - Name: thinkName, - Description: thinkDescription, - InputSchema: llm.MustSchema(thinkInputSchema), - Run: thinkRun, -} - -const ( - thinkName = "think" - thinkDescription = `Think out loud, take notes, form plans. Has no external effects.` - - // If you modify this, update the termui template for prettier rendering. - thinkInputSchema = ` -{ - "type": "object", - "required": ["thoughts"], - "properties": { - "thoughts": { - "type": "string", - "description": "The thoughts, notes, or plans to record" - } - } -} -` -) - -func thinkRun(ctx context.Context, m json.RawMessage) llm.ToolOut { - return llm.ToolOut{LLMContent: llm.TextContent("recorded")} -} diff --git a/claudetool/think_test.go b/claudetool/think_test.go deleted file mode 100644 index b2d9c431c049b3e22c3c5ae444d870bed8c99cad..0000000000000000000000000000000000000000 --- a/claudetool/think_test.go +++ /dev/null @@ -1,34 +0,0 @@ -package claudetool - -import ( - "context" - "encoding/json" - "testing" -) - -func TestThinkRun(t *testing.T) { - input := struct { - Thoughts string `json:"thoughts"` - }{ - Thoughts: "This is a test thought", - } - - inputBytes, err := json.Marshal(input) - if err != nil { - t.Fatal(err) - } - - result := thinkRun(context.Background(), inputBytes) - - if result.Error != nil { - t.Errorf("unexpected error: %v", result.Error) - } - - if len(result.LLMContent) == 0 { - t.Error("expected LLM content") - } - - if result.LLMContent[0].Text != "recorded" { - t.Errorf("expected 'recorded', got %q", result.LLMContent[0].Text) - } -} diff --git a/claudetool/toolset.go b/claudetool/toolset.go index a8f4f05d33570c233c5a955938ff5eed29fe3d65..8ae43471bedb544525e891867ac33fccf6e94f4c 100644 --- a/claudetool/toolset.go +++ b/claudetool/toolset.go @@ -126,7 +126,6 @@ func NewToolSet(ctx context.Context, cfg ToolSetConfig) *ToolSet { outputIframeTool := &OutputIframeTool{WorkingDir: wd} tools := []*llm.Tool{ - Think, bashTool.Tool(), patchTool.Tool(), keywordTool.Tool(), diff --git a/llm/ant/ant.go b/llm/ant/ant.go index aa3929b7ebc47c4e6efa1f1e06bfc051c2f4ef0f..9d7bdd0d51c253ab78a36260ba9b69459c8d1d9a 100644 --- a/llm/ant/ant.go +++ b/llm/ant/ant.go @@ -82,11 +82,12 @@ func (s *Service) MaxImageDimension() int { // Service provides Claude completions. // Fields should not be altered concurrently with calling any method on Service. type Service struct { - HTTPC *http.Client // defaults to http.DefaultClient if nil - URL string // defaults to DefaultURL if empty - APIKey string // must be non-empty - Model string // defaults to DefaultModel if empty - MaxTokens int // defaults to DefaultMaxTokens if zero + HTTPC *http.Client // defaults to http.DefaultClient if nil + URL string // defaults to DefaultURL if empty + APIKey string // must be non-empty + Model string // defaults to DefaultModel if empty + MaxTokens int // defaults to DefaultMaxTokens if zero + ThinkingLevel llm.ThinkingLevel // thinking level (ThinkingLevelOff disables, default is ThinkingLevelMedium) } var _ llm.Service = (*Service)(nil) @@ -217,6 +218,12 @@ type systemContent struct { } // request represents the request payload for creating a message. +// thinking configures extended thinking for Claude models. +type thinking struct { + Type string `json:"type"` // "enabled" + BudgetTokens int `json:"budget_tokens,omitempty"` // Max tokens for thinking +} + type request struct { // Field order matters for JSON serialization - stable fields should come first // to maximize prefix deduplication when storing LLM requests. @@ -226,6 +233,7 @@ type request struct { System []systemContent `json:"system,omitempty"` Tools []*tool `json:"tools,omitempty"` ToolChoice *toolChoice `json:"tool_choice,omitempty"` + Thinking *thinking `json:"thinking,omitempty"` Temperature float64 `json:"temperature,omitempty"` TopK int `json:"top_k,omitempty"` TopP float64 `json:"top_p,omitempty"` @@ -393,14 +401,27 @@ func fromLLMSystem(s llm.SystemContent) systemContent { } func (s *Service) fromLLMRequest(r *llm.Request) *request { - return &request{ + maxTokens := cmp.Or(s.MaxTokens, DefaultMaxTokens) + + req := &request{ Model: cmp.Or(s.Model, DefaultModel), Messages: mapped(r.Messages, fromLLMMessage), - MaxTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens), + MaxTokens: maxTokens, ToolChoice: fromLLMToolChoice(r.ToolChoice), Tools: mapped(r.Tools, fromLLMTool), System: mapped(r.System, fromLLMSystem), } + + // Enable extended thinking if a thinking level is set + if s.ThinkingLevel != llm.ThinkingLevelOff { + budget := s.ThinkingLevel.ThinkingBudgetTokens() + // Ensure max_tokens > budget_tokens as required by Anthropic API + if maxTokens <= budget { + req.MaxTokens = budget + 1024 + } + req.Thinking = &thinking{Type: "enabled", BudgetTokens: budget} + } + return req } func toLLMUsage(u usage) llm.Usage { diff --git a/llm/llm.go b/llm/llm.go index 3e2c1c21e421acc5eb423fcd276c71abf1922456..144d645e2da9699ef94b175858a8510bba453d81 100644 --- a/llm/llm.go +++ b/llm/llm.go @@ -198,7 +198,7 @@ func ContentsAttr(contents []Content) slog.Attr { attrs = append(attrs, slog.Any("tool_result", content.ToolResult)) attrs = append(attrs, slog.Bool("tool_error", content.ToolError)) case ContentTypeThinking: - attrs = append(attrs, slog.String("thinking", content.Text)) + attrs = append(attrs, slog.String("thinking", content.Thinking)) default: attrs = append(attrs, slog.String("unknown_content_type", content.Type.String())) attrs = append(attrs, slog.Any("text", content)) // just log it all raw, better to have too much than not enough @@ -213,9 +213,10 @@ type ( ContentType int ToolChoiceType int StopReason int + ThinkingLevel int ) -//go:generate go tool golang.org/x/tools/cmd/stringer -type=MessageRole,ContentType,ToolChoiceType,StopReason -output=llm_string.go +//go:generate go tool golang.org/x/tools/cmd/stringer -type=MessageRole,ContentType,ToolChoiceType,StopReason,ThinkingLevel -output=llm_string.go const ( MessageRoleUser MessageRole = iota @@ -239,6 +240,48 @@ const ( StopReasonRefusal ) +// ThinkingLevel controls how much thinking/reasoning the model does. +// ThinkingLevelOff is the zero value and disables thinking. +const ( + ThinkingLevelOff ThinkingLevel = iota // No thinking (zero value) + ThinkingLevelMinimal // Minimal thinking (1024 tokens / "minimal") + ThinkingLevelLow // Low thinking (2048 tokens / "low") + ThinkingLevelMedium // Medium thinking (8192 tokens / "medium") + ThinkingLevelHigh // High thinking (16384 tokens / "high") +) + +// ThinkingBudgetTokens returns the recommended budget_tokens for Anthropic's extended thinking. +func (t ThinkingLevel) ThinkingBudgetTokens() int { + switch t { + case ThinkingLevelMinimal: + return 1024 + case ThinkingLevelLow: + return 2048 + case ThinkingLevelMedium: + return 8192 + case ThinkingLevelHigh: + return 16384 + default: + return 0 + } +} + +// ThinkingEffort returns the reasoning effort string for OpenAI's reasoning API. +func (t ThinkingLevel) ThinkingEffort() string { + switch t { + case ThinkingLevelMinimal: + return "minimal" + case ThinkingLevelLow: + return "low" + case ThinkingLevelMedium: + return "medium" + case ThinkingLevelHigh: + return "high" + default: + return "" + } +} + type Response struct { ID string Type string diff --git a/llm/llm_string.go b/llm/llm_string.go index bfc8690ee2a8990e2907d16e4c60702a8f923e44..41490b6bda96922fb329eab46269bd75caa1b022 100644 --- a/llm/llm_string.go +++ b/llm/llm_string.go @@ -1,4 +1,4 @@ -// Code generated by "stringer -type=MessageRole,ContentType,ToolChoiceType,StopReason -output=llm_string.go"; DO NOT EDIT. +// Code generated by "stringer -type=MessageRole,ContentType,ToolChoiceType,StopReason,ThinkingLevel -output=llm_string.go"; DO NOT EDIT. package llm @@ -88,3 +88,25 @@ func (i StopReason) String() string { } return _StopReason_name[_StopReason_index[idx]:_StopReason_index[idx+1]] } +func _() { + // An "invalid array index" compiler error signifies that the constant values have changed. + // Re-run the stringer command to generate them again. + var x [1]struct{} + _ = x[ThinkingLevelOff-0] + _ = x[ThinkingLevelMinimal-1] + _ = x[ThinkingLevelLow-2] + _ = x[ThinkingLevelMedium-3] + _ = x[ThinkingLevelHigh-4] +} + +const _ThinkingLevel_name = "ThinkingLevelOffThinkingLevelMinimalThinkingLevelLowThinkingLevelMediumThinkingLevelHigh" + +var _ThinkingLevel_index = [...]uint8{0, 16, 36, 52, 71, 88} + +func (i ThinkingLevel) String() string { + idx := int(i) - 0 + if i < 0 || idx >= len(_ThinkingLevel_index)-1 { + return "ThinkingLevel(" + strconv.FormatInt(int64(i), 10) + ")" + } + return _ThinkingLevel_name[_ThinkingLevel_index[idx]:_ThinkingLevel_index[idx+1]] +} diff --git a/llm/oai/oai_responses.go b/llm/oai/oai_responses.go index 9b58ac9449761191785c6b309b42322a6075b97d..4b7808344e2b145374d0b08a7724337d6b64a552 100644 --- a/llm/oai/oai_responses.go +++ b/llm/oai/oai_responses.go @@ -21,13 +21,14 @@ import ( // This API is required for models like gpt-5.1-codex. // Fields should not be altered concurrently with calling any method on ResponsesService. type ResponsesService struct { - HTTPC *http.Client // defaults to http.DefaultClient if nil - APIKey string // optional, if not set will try to load from env var - Model Model // defaults to DefaultModel if zero value - ModelURL string // optional, overrides Model.URL - MaxTokens int // defaults to DefaultMaxTokens if zero - Org string // optional - organization ID - DumpLLM bool // whether to dump request/response text to files for debugging; defaults to false + HTTPC *http.Client // defaults to http.DefaultClient if nil + APIKey string // optional, if not set will try to load from env var + Model Model // defaults to DefaultModel if zero value + ModelURL string // optional, overrides Model.URL + MaxTokens int // defaults to DefaultMaxTokens if zero + Org string // optional - organization ID + DumpLLM bool // whether to dump request/response text to files for debugging; defaults to false + ThinkingLevel llm.ThinkingLevel // thinking level (ThinkingLevelOff disables reasoning) } var _ llm.Service = (*ResponsesService)(nil) @@ -391,6 +392,14 @@ func (s *ResponsesService) Do(ctx context.Context, ir *llm.Request) (*llm.Respon MaxOutputTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens), } + // Add reasoning if thinking is enabled + if s.ThinkingLevel != llm.ThinkingLevelOff { + effort := s.ThinkingLevel.ThinkingEffort() + if effort != "" { + req.Reasoning = &responsesReasoning{Effort: effort} + } + } + // Add tool choice if specified if ir.ToolChoice != nil { req.ToolChoice = fromLLMToolChoice(ir.ToolChoice) diff --git a/loop/loop_test.go b/loop/loop_test.go index 86f7d9c856076cc94db6f6db943e99cd8916639d..f64319608ebffaed60243f5b543caeed691f535d 100644 --- a/loop/loop_test.go +++ b/loop/loop_test.go @@ -1183,7 +1183,7 @@ func TestPredictableServiceMaxImageDimension(t *testing.T) { } } -func TestPredictableServiceThinkTool(t *testing.T) { +func TestPredictableServiceThinking(t *testing.T) { service := NewPredictableService() ctx := context.Background() @@ -1195,34 +1195,30 @@ func TestPredictableServiceThinkTool(t *testing.T) { resp, err := service.Do(ctx, req) if err != nil { - t.Fatalf("think tool test failed: %v", err) + t.Fatalf("thinking test failed: %v", err) } - if resp.StopReason != llm.StopReasonToolUse { - t.Errorf("expected tool use stop reason, got %v", resp.StopReason) + // Now returns EndTurn since thinking is content, not a tool + if resp.StopReason != llm.StopReasonEndTurn { + t.Errorf("expected end turn stop reason, got %v", resp.StopReason) } - // Find the tool use content - var toolUseContent *llm.Content + // Find the thinking content + var thinkingContent *llm.Content for _, content := range resp.Content { - if content.Type == llm.ContentTypeToolUse && content.ToolName == "think" { - toolUseContent = &content + if content.Type == llm.ContentTypeThinking { + thinkingContent = &content break } } - if toolUseContent == nil { - t.Fatal("no think tool use content found") - } - - // Check tool input contains the thoughts - var toolInput map[string]interface{} - if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil { - t.Fatalf("failed to parse tool input: %v", err) + if thinkingContent == nil { + t.Fatal("no thinking content found") } - if toolInput["thoughts"] != "This is a test thought" { - t.Errorf("expected thoughts 'This is a test thought', got '%v'", toolInput["thoughts"]) + // Check thinking content contains the thoughts + if thinkingContent.Thinking != "This is a test thought" { + t.Errorf("expected thinking 'This is a test thought', got '%v'", thinkingContent.Thinking) } } diff --git a/loop/predictable.go b/loop/predictable.go index 4a0f062b8a283fbc18548dbca6fa6405c03f45cd..2a9ffebbea4fc579b948d921bf8ab2fa18348ca7 100644 --- a/loop/predictable.go +++ b/loop/predictable.go @@ -20,7 +20,7 @@ import ( // Available patterns include: // - "echo: " - echoes the text back // - "bash: " - triggers bash tool with command -// - "think: " - triggers think tool +// - "think: " - returns response with extended thinking content // - "subagent: " - triggers subagent tool // - "change_dir: " - triggers change_dir tool // - "delay: " - delays response by specified seconds @@ -112,7 +112,7 @@ func (s *PredictableService) Do(ctx context.Context, req *llm.Request) (*llm.Res return s.makeResponse("Hello! I'm Shelley, your AI assistant. How can I help you today?", inputTokens), nil case "Create an example": - return s.makeThinkToolResponse("I'll create a simple example for you.", inputTokens), nil + return s.makeThinkingResponse("I'll create a simple example for you.", inputTokens), nil case "screenshot": // Trigger a screenshot of the current page @@ -155,7 +155,7 @@ func (s *PredictableService) Do(ctx context.Context, req *llm.Request) (*llm.Res if strings.HasPrefix(inputText, "think: ") { thoughts := strings.TrimPrefix(inputText, "think: ") - return s.makeThinkToolResponse(thoughts, inputTokens), nil + return s.makeThinkingResponse(thoughts, inputTokens), nil } if strings.HasPrefix(inputText, "patch: ") { @@ -288,32 +288,23 @@ func (s *PredictableService) makeBashToolResponse(command string, inputTokens ui } } -// makeThinkToolResponse creates a response that calls the think tool -func (s *PredictableService) makeThinkToolResponse(thoughts string, inputTokens uint64) *llm.Response { - // Properly marshal the thoughts to avoid JSON escaping issues - toolInputData := map[string]string{"thoughts": thoughts} - toolInputBytes, _ := json.Marshal(toolInputData) - toolInput := json.RawMessage(toolInputBytes) - responseText := "Let me think about this." - outputTokens := uint64(len(responseText)/4 + len(toolInputBytes)/4) +// makeThinkingResponse creates a response with extended thinking content +func (s *PredictableService) makeThinkingResponse(thoughts string, inputTokens uint64) *llm.Response { + responseText := "I've considered my approach." + outputTokens := uint64(len(responseText)/4 + len(thoughts)/4) if outputTokens == 0 { outputTokens = 1 } return &llm.Response{ - ID: fmt.Sprintf("pred-think-%d", time.Now().UnixNano()), + ID: fmt.Sprintf("pred-thinking-%d", time.Now().UnixNano()), Type: "message", Role: llm.MessageRoleAssistant, Model: "predictable-v1", Content: []llm.Content{ + {Type: llm.ContentTypeThinking, Thinking: thoughts}, {Type: llm.ContentTypeText, Text: responseText}, - { - ID: fmt.Sprintf("tool_%d", time.Now().UnixNano()%1000), - Type: llm.ContentTypeToolUse, - ToolName: "think", - ToolInput: toolInput, - }, }, - StopReason: llm.StopReasonToolUse, + StopReason: llm.StopReasonEndTurn, Usage: llm.Usage{ InputTokens: inputTokens, OutputTokens: outputTokens, @@ -629,13 +620,10 @@ func (s *PredictableService) makeToolSmorgasbordResponse(inputTokens uint64) *ll ToolInput: json.RawMessage(bashInput), }) - // think tool - thinkInput, _ := json.Marshal(map[string]string{"thoughts": "I'm thinking about the best approach for this task. Let me consider all the options available."}) + // extended thinking content (not a tool) content = append(content, llm.Content{ - ID: fmt.Sprintf("tool_think_%d", (baseNano+1)%1000), - Type: llm.ContentTypeToolUse, - ToolName: "think", - ToolInput: json.RawMessage(thinkInput), + Type: llm.ContentTypeThinking, + Thinking: "I'm thinking about the best approach for this task. Let me consider all the options available.", }) // patch tool diff --git a/models/models.go b/models/models.go index a82ca5be70be43d604c8ff8410339922935d3723..9c866eb0d156b9428fa9903e18ae83827daa8c07 100644 --- a/models/models.go +++ b/models/models.go @@ -166,7 +166,7 @@ func All() []Model { if config.AnthropicAPIKey == "" { return nil, fmt.Errorf("claude-opus-4.5 requires ANTHROPIC_API_KEY") } - svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Opus, HTTPC: httpc} + svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Opus, HTTPC: httpc, ThinkingLevel: llm.ThinkingLevelMedium} if url := config.getAnthropicURL(); url != "" { svc.URL = url } @@ -183,7 +183,7 @@ func All() []Model { if config.AnthropicAPIKey == "" { return nil, fmt.Errorf("claude-sonnet-4.5 requires ANTHROPIC_API_KEY") } - svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Sonnet, HTTPC: httpc} + svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Sonnet, HTTPC: httpc, ThinkingLevel: llm.ThinkingLevelMedium} if url := config.getAnthropicURL(); url != "" { svc.URL = url } @@ -200,7 +200,7 @@ func All() []Model { if config.AnthropicAPIKey == "" { return nil, fmt.Errorf("claude-haiku-4.5 requires ANTHROPIC_API_KEY") } - svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Haiku, HTTPC: httpc} + svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Haiku, HTTPC: httpc, ThinkingLevel: llm.ThinkingLevelMedium} if url := config.getAnthropicURL(); url != "" { svc.URL = url } @@ -234,7 +234,7 @@ func All() []Model { if config.OpenAIAPIKey == "" { return nil, fmt.Errorf("gpt-5.2-codex requires OPENAI_API_KEY") } - svc := &oai.ResponsesService{Model: oai.GPT52Codex, APIKey: config.OpenAIAPIKey, HTTPC: httpc} + svc := &oai.ResponsesService{Model: oai.GPT52Codex, APIKey: config.OpenAIAPIKey, HTTPC: httpc, ThinkingLevel: llm.ThinkingLevelMedium} if url := config.getOpenAIURL(); url != "" { svc.ModelURL = url } @@ -682,10 +682,11 @@ func (m *Manager) createServiceFromModel(model *generated.Model) llm.Service { switch model.ProviderType { case "anthropic": return &ant.Service{ - APIKey: model.ApiKey, - URL: model.Endpoint, - Model: model.ModelName, - HTTPC: m.httpc, + APIKey: model.ApiKey, + URL: model.Endpoint, + Model: model.ModelName, + HTTPC: m.httpc, + ThinkingLevel: llm.ThinkingLevelMedium, } case "openai": return &oai.Service{ @@ -706,8 +707,9 @@ func (m *Manager) createServiceFromModel(model *generated.Model) llm.Service { ModelName: model.ModelName, URL: model.Endpoint, }, - MaxTokens: int(model.MaxTokens), - HTTPC: m.httpc, + MaxTokens: int(model.MaxTokens), + HTTPC: m.httpc, + ThinkingLevel: llm.ThinkingLevelMedium, } case "gemini": return &gem.Service{ diff --git a/server/custom_models.go b/server/custom_models.go index 619e5a6104730ab865714ac8ca150ddcbe5b9f52..076bbf4da4b79f613cf668bfdbe66583432e12fd 100644 --- a/server/custom_models.go +++ b/server/custom_models.go @@ -349,9 +349,10 @@ func (s *Server) handleTestModel(w http.ResponseWriter, r *http.Request) { switch req.ProviderType { case "anthropic": service = &ant.Service{ - APIKey: req.APIKey, - URL: req.Endpoint, - Model: req.ModelName, + APIKey: req.APIKey, + URL: req.Endpoint, + Model: req.ModelName, + ThinkingLevel: llm.ThinkingLevelMedium, } case "openai": service = &oai.Service{ diff --git a/test/server_test.go b/test/server_test.go index b9cf3ad635d1abe660648e8be45758ccf200fe2c..8edd19c694d9943cb9b0dc71aa7734049807bd20 100644 --- a/test/server_test.go +++ b/test/server_test.go @@ -333,10 +333,10 @@ func TestPredictableServiceWithTools(t *testing.T) { t.Fatal("Expected greeting to mention Shelley") } - // Second call should return tool use + // Second call should return tool use (bash command) resp2, err := service.Do(context.Background(), &llm.Request{ Messages: []llm.Message{ - {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Create an example"}}}, + {Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "bash: echo hello"}}}, }, }) if err != nil { @@ -364,8 +364,8 @@ func TestPredictableServiceWithTools(t *testing.T) { t.Fatal("Expected tool use content") } - if toolUse.ToolName != "think" { - t.Fatalf("Expected think tool, got %s", toolUse.ToolName) + if toolUse.ToolName != "bash" { + t.Fatalf("Expected bash tool, got %s", toolUse.ToolName) } } diff --git a/ui/src/components/ChatInterface.tsx b/ui/src/components/ChatInterface.tsx index d162a19bd8ce410dce2cd1db36f3bbe53766a72f..900d3dabc8856307629056c185eb6747e280bd12 100644 --- a/ui/src/components/ChatInterface.tsx +++ b/ui/src/components/ChatInterface.tsx @@ -15,7 +15,7 @@ import DiffViewer from "./DiffViewer"; import BashTool from "./BashTool"; import PatchTool from "./PatchTool"; import ScreenshotTool from "./ScreenshotTool"; -import ThinkTool from "./ThinkTool"; + import KeywordSearchTool from "./KeywordSearchTool"; import BrowserNavigateTool from "./BrowserNavigateTool"; import BrowserEvalTool from "./BrowserEvalTool"; @@ -227,7 +227,7 @@ const TOOL_COMPONENTS: Record> = { patch: PatchTool, screenshot: ScreenshotTool, browser_take_screenshot: ScreenshotTool, - think: ThinkTool, + keyword_search: KeywordSearchTool, browser_navigate: BrowserNavigateTool, browser_eval: BrowserEvalTool, @@ -1192,6 +1192,9 @@ function ChatInterface({ coalescedItems.push({ type: "message", message }); } + // Check if this message was truncated (tool calls lost) + const wasTruncated = llmData.ExcludedFromContext === true; + // Add tool uses as separate items toolUses.forEach((toolUse) => { const resultData = toolUse.ID ? toolResultMap[toolUse.ID] : undefined; @@ -1203,10 +1206,12 @@ function ChatInterface({ toolName: toolUse.ToolName, toolInput: toolUse.ToolInput, toolResult: resultData?.result, - toolError: resultData?.error, + // Mark as error if truncated and no result + toolError: resultData?.error || (wasTruncated && !resultData), toolStartTime: resultData?.startTime, toolEndTime: resultData?.endTime, - hasResult: !!resultData || completedViaDisplay, + // Mark as complete if truncated (tool was lost, not running) + hasResult: !!resultData || completedViaDisplay || wasTruncated, display: displayData, }); }); diff --git a/ui/src/components/Message.tsx b/ui/src/components/Message.tsx index da5ce603dd24c4e899d6a82b078f001114b8bbe5..08c359852b5d50db8192b5fe5a1955ac0ac7de45 100644 --- a/ui/src/components/Message.tsx +++ b/ui/src/components/Message.tsx @@ -5,7 +5,7 @@ import BashTool from "./BashTool"; import PatchTool from "./PatchTool"; import ScreenshotTool from "./ScreenshotTool"; import GenericTool from "./GenericTool"; -import ThinkTool from "./ThinkTool"; + import KeywordSearchTool from "./KeywordSearchTool"; import BrowserNavigateTool from "./BrowserNavigateTool"; import BrowserEvalTool from "./BrowserEvalTool"; @@ -15,6 +15,7 @@ import ChangeDirTool from "./ChangeDirTool"; import BrowserResizeTool from "./BrowserResizeTool"; import SubagentTool from "./SubagentTool"; import OutputIframeTool from "./OutputIframeTool"; +import ThinkingContent from "./ThinkingContent"; import UsageDetailModal from "./UsageDetailModal"; import MessageActionBar from "./MessageActionBar"; @@ -283,7 +284,7 @@ function Message({ message, onOpenDiffViewer, onCommentTextChange }: MessageProp } }; - // Get text content from message for copying (includes tool results) + // Get text content from message for copying (includes tool results and thinking) const getMessageText = (): string => { if (!llmMessage?.Content) return ""; @@ -292,6 +293,12 @@ function Message({ message, onOpenDiffViewer, onCommentTextChange }: MessageProp const contentType = getContentType(content.Type); if (contentType === "text" && content.Text) { textParts.push(content.Text); + } else if (contentType === "thinking") { + // Include thinking content + const thinkingText = content.Thinking || content.Text; + if (thinkingText) { + textParts.push(`[Thinking]\n${thinkingText}`); + } } else if (contentType === "tool_result" && content.ToolResult) { // Extract text from tool result content content.ToolResult.forEach((result) => { @@ -459,10 +466,7 @@ function Message({ message, onOpenDiffViewer, onCommentTextChange }: MessageProp if (content.ToolName === "screenshot" || content.ToolName === "browser_take_screenshot") { return ; } - // Use specialized component for think tool - if (content.ToolName === "think") { - return ; - } + // Use specialized component for change_dir tool if (content.ToolName === "change_dir") { return ; @@ -598,20 +602,6 @@ function Message({ message, onOpenDiffViewer, onCommentTextChange }: MessageProp toolResult={content.ToolResult} hasError={hasError} executionTime={executionTime} - display={content.Display} - /> - ); - } - - // Use specialized component for think tool - if (toolName === "think") { - return ( - ); } @@ -753,9 +743,11 @@ function Message({ message, onOpenDiffViewer, onCommentTextChange }: MessageProp } case "redacted_thinking": return
[Thinking content hidden]
; - case "thinking": - // Hide thinking content by default in main flow, but could be made expandable - return null; + case "thinking": { + const thinkingText = content.Thinking || content.Text || ""; + if (!thinkingText) return null; + return ; + } default: { // For unknown content types, show the type and try to display useful content const displayText = content.Text || content.Data || ""; @@ -989,18 +981,22 @@ function Message({ message, onOpenDiffViewer, onCommentTextChange }: MessageProp return null; } - // Filter out thinking content, empty content, tool_use, and tool_result + // Filter out redacted thinking, empty content, tool_use, and tool_result + // Keep thinking content (3) for display const meaningfulContent = llmMessage?.Content?.filter((c) => { const contentType = c.Type; - // Filter out thinking (3), redacted thinking (4), tool_use (5), tool_result (6), and empty text content + // Filter out redacted thinking (4), tool_use (5), tool_result (6), and empty text content + // Keep thinking (3) if it has content + if (contentType === 3) { + return !!(c.Thinking || c.Text); + } return ( - contentType !== 3 && contentType !== 4 && contentType !== 5 && contentType !== 6 && (c.Text?.trim() || contentType !== 2) - ); // 3 = thinking, 4 = redacted_thinking, 5 = tool_use, 6 = tool_result, 2 = text + ); // 4 = redacted_thinking, 5 = tool_use, 6 = tool_result, 2 = text }) || []; // Don't filter out messages that contain operation status like "[Operation cancelled]" diff --git a/ui/src/components/ThinkTool.tsx b/ui/src/components/ThinkTool.tsx deleted file mode 100644 index f23f4cd75b3e711969fd3a58903867e4606bc68a..0000000000000000000000000000000000000000 --- a/ui/src/components/ThinkTool.tsx +++ /dev/null @@ -1,94 +0,0 @@ -import React, { useState } from "react"; -import { LLMContent } from "../types"; - -interface ThinkToolProps { - // For tool_use (pending state) - toolInput?: unknown; // { thoughts: string } - isRunning?: boolean; - - // For tool_result (completed state) - toolResult?: LLMContent[]; - hasError?: boolean; - executionTime?: string; -} - -function ThinkTool({ toolInput, isRunning, toolResult, hasError, executionTime }: ThinkToolProps) { - const [isExpanded, setIsExpanded] = useState(false); - - // Extract thoughts from toolInput - const thoughts = - typeof toolInput === "object" && - toolInput !== null && - "thoughts" in toolInput && - typeof toolInput.thoughts === "string" - ? toolInput.thoughts - : typeof toolInput === "string" - ? toolInput - : ""; - - // Truncate thoughts for display - get first 50 chars - const truncateThoughts = (text: string, maxLen: number = 50) => { - if (!text) return ""; - if (text.length <= maxLen) return text; - return text.substring(0, maxLen) + "..."; - }; - - const displayThoughts = truncateThoughts(thoughts); - const isComplete = !isRunning && toolResult !== undefined; - - return ( -
-
setIsExpanded(!isExpanded)}> -
- 💭 - - {displayThoughts || (isRunning ? "thinking..." : "thinking...")} - - {isComplete && hasError && ✗} - {isComplete && !hasError && ✓} -
- -
- - {isExpanded && ( -
-
-
- Thoughts: - {executionTime && {executionTime}} -
-
- {thoughts || "(no thoughts)"} -
-
-
- )} -
- ); -} - -export default ThinkTool; diff --git a/ui/src/components/ThinkingContent.tsx b/ui/src/components/ThinkingContent.tsx new file mode 100644 index 0000000000000000000000000000000000000000..fc3006cb8389e918735eb3ac02562796f115943a --- /dev/null +++ b/ui/src/components/ThinkingContent.tsx @@ -0,0 +1,90 @@ +import React, { useState } from "react"; + +interface ThinkingContentProps { + thinking: string; +} + +function ThinkingContent({ thinking }: ThinkingContentProps) { + const [isExpanded, setIsExpanded] = useState(true); + + // Truncate thinking for display - get first 80 chars + const truncateThinking = (text: string, maxLen: number = 80) => { + if (!text) return ""; + const firstLine = text.split("\n")[0]; + if (firstLine.length <= maxLen) return firstLine; + return firstLine.substring(0, maxLen) + "..."; + }; + + const preview = truncateThinking(thinking); + + return ( +
+
setIsExpanded(!isExpanded)} + style={{ + cursor: "pointer", + display: "flex", + alignItems: "flex-start", + gap: "0.5rem", + marginLeft: 0, + }} + > + 💭 +
+ {isExpanded ? thinking : preview} +
+ +
+
+ ); +} + +export default ThinkingContent;