shelley: Replace think tool with unified ThinkingLevel API

Philip Zeyliger and Shelley created 2 months ago

Prompt: Get rid of the think tool in shelley; enable thinking in the
anthropic (and other) models. Make the ui show thinking steps.
Continue: squash commits, unify thinking approach across providers.

Changes:
- Remove the think tool from claudetool (think.go, think_test.go)
- Add ThinkingLevel type to llm package with levels: Off, Minimal, Low,
  Medium, High
- ThinkingLevel.ThinkingBudgetTokens() maps to Anthropic budget_tokens:
  Minimal=1024, Low=2048, Medium=8192, High=16384
- ThinkingLevel.ThinkingEffort() maps to OpenAI reasoning.effort string
- ThinkingLevelOff is the zero value, so existing code continues to work
- Add ThinkingLevel field to ant.Service and oai.ResponsesService
- Set ThinkingLevel=Medium for all production Anthropic and OpenAI services
- Update UI with ThinkingContent component to display thinking blocks
- Thinking is expanded by default, shows preview in header, full text below
- Remove ThinkTool.tsx component (no longer needed)

This unifies the thinking/reasoning approach across providers following
pi-mono's pattern of using effort levels rather than raw token budgets.

Co-authored-by: Shelley <shelley@exe.dev>

Change summary

claudetool/think.go                   | 39 ------------
claudetool/think_test.go              | 34 ----------
claudetool/toolset.go                 |  1 
llm/ant/ant.go                        | 35 ++++++++--
llm/llm.go                            | 47 +++++++++++++
llm/llm_string.go                     | 24 +++++++
llm/oai/oai_responses.go              | 23 ++++--
loop/loop_test.go                     | 32 ++++-----
loop/predictable.go                   | 38 ++++-------
models/models.go                      | 22 +++---
server/custom_models.go               |  7 +
test/server_test.go                   |  8 +-
ui/src/components/ChatInterface.tsx   | 13 ++-
ui/src/components/Message.tsx         | 50 +++++++--------
ui/src/components/ThinkTool.tsx       | 94 -----------------------------
ui/src/components/ThinkingContent.tsx | 90 +++++++++++++++++++++++++++
16 files changed, 281 insertions(+), 276 deletions(-)

Detailed changes

claudetool/think.go 🔗

@@ -1,39 +0,0 @@
-package claudetool
-
-import (
-	"context"
-	"encoding/json"
-
-	"shelley.exe.dev/llm"
-)
-
-// The Think tool provides space to think.
-var Think = &llm.Tool{
-	Name:        thinkName,
-	Description: thinkDescription,
-	InputSchema: llm.MustSchema(thinkInputSchema),
-	Run:         thinkRun,
-}
-
-const (
-	thinkName        = "think"
-	thinkDescription = `Think out loud, take notes, form plans. Has no external effects.`
-
-	// If you modify this, update the termui template for prettier rendering.
-	thinkInputSchema = `
-{
-  "type": "object",
-  "required": ["thoughts"],
-  "properties": {
-    "thoughts": {
-      "type": "string",
-      "description": "The thoughts, notes, or plans to record"
-    }
-  }
-}
-`
-)
-
-func thinkRun(ctx context.Context, m json.RawMessage) llm.ToolOut {
-	return llm.ToolOut{LLMContent: llm.TextContent("recorded")}
-}

claudetool/think_test.go 🔗

@@ -1,34 +0,0 @@
-package claudetool
-
-import (
-	"context"
-	"encoding/json"
-	"testing"
-)
-
-func TestThinkRun(t *testing.T) {
-	input := struct {
-		Thoughts string `json:"thoughts"`
-	}{
-		Thoughts: "This is a test thought",
-	}
-
-	inputBytes, err := json.Marshal(input)
-	if err != nil {
-		t.Fatal(err)
-	}
-
-	result := thinkRun(context.Background(), inputBytes)
-
-	if result.Error != nil {
-		t.Errorf("unexpected error: %v", result.Error)
-	}
-
-	if len(result.LLMContent) == 0 {
-		t.Error("expected LLM content")
-	}
-
-	if result.LLMContent[0].Text != "recorded" {
-		t.Errorf("expected 'recorded', got %q", result.LLMContent[0].Text)
-	}
-}

claudetool/toolset.go 🔗

@@ -126,7 +126,6 @@ func NewToolSet(ctx context.Context, cfg ToolSetConfig) *ToolSet {
 	outputIframeTool := &OutputIframeTool{WorkingDir: wd}
 
 	tools := []*llm.Tool{
-		Think,
 		bashTool.Tool(),
 		patchTool.Tool(),
 		keywordTool.Tool(),

llm/ant/ant.go 🔗

@@ -82,11 +82,12 @@ func (s *Service) MaxImageDimension() int {
 // Service provides Claude completions.
 // Fields should not be altered concurrently with calling any method on Service.
 type Service struct {
-	HTTPC     *http.Client // defaults to http.DefaultClient if nil
-	URL       string       // defaults to DefaultURL if empty
-	APIKey    string       // must be non-empty
-	Model     string       // defaults to DefaultModel if empty
-	MaxTokens int          // defaults to DefaultMaxTokens if zero
+	HTTPC         *http.Client      // defaults to http.DefaultClient if nil
+	URL           string            // defaults to DefaultURL if empty
+	APIKey        string            // must be non-empty
+	Model         string            // defaults to DefaultModel if empty
+	MaxTokens     int               // defaults to DefaultMaxTokens if zero
+	ThinkingLevel llm.ThinkingLevel // thinking level (ThinkingLevelOff disables, default is ThinkingLevelMedium)
 }
 
 var _ llm.Service = (*Service)(nil)
@@ -217,6 +218,12 @@ type systemContent struct {
 }
 
 // request represents the request payload for creating a message.
+// thinking configures extended thinking for Claude models.
+type thinking struct {
+	Type         string `json:"type"`                    // "enabled"
+	BudgetTokens int    `json:"budget_tokens,omitempty"` // Max tokens for thinking
+}
+
 type request struct {
 	// Field order matters for JSON serialization - stable fields should come first
 	// to maximize prefix deduplication when storing LLM requests.
@@ -226,6 +233,7 @@ type request struct {
 	System        []systemContent `json:"system,omitempty"`
 	Tools         []*tool         `json:"tools,omitempty"`
 	ToolChoice    *toolChoice     `json:"tool_choice,omitempty"`
+	Thinking      *thinking       `json:"thinking,omitempty"`
 	Temperature   float64         `json:"temperature,omitempty"`
 	TopK          int             `json:"top_k,omitempty"`
 	TopP          float64         `json:"top_p,omitempty"`
@@ -393,14 +401,27 @@ func fromLLMSystem(s llm.SystemContent) systemContent {
 }
 
 func (s *Service) fromLLMRequest(r *llm.Request) *request {
-	return &request{
+	maxTokens := cmp.Or(s.MaxTokens, DefaultMaxTokens)
+
+	req := &request{
 		Model:      cmp.Or(s.Model, DefaultModel),
 		Messages:   mapped(r.Messages, fromLLMMessage),
-		MaxTokens:  cmp.Or(s.MaxTokens, DefaultMaxTokens),
+		MaxTokens:  maxTokens,
 		ToolChoice: fromLLMToolChoice(r.ToolChoice),
 		Tools:      mapped(r.Tools, fromLLMTool),
 		System:     mapped(r.System, fromLLMSystem),
 	}
+
+	// Enable extended thinking if a thinking level is set
+	if s.ThinkingLevel != llm.ThinkingLevelOff {
+		budget := s.ThinkingLevel.ThinkingBudgetTokens()
+		// Ensure max_tokens > budget_tokens as required by Anthropic API
+		if maxTokens <= budget {
+			req.MaxTokens = budget + 1024
+		}
+		req.Thinking = &thinking{Type: "enabled", BudgetTokens: budget}
+	}
+	return req
 }
 
 func toLLMUsage(u usage) llm.Usage {

llm/llm.go 🔗

@@ -198,7 +198,7 @@ func ContentsAttr(contents []Content) slog.Attr {
 			attrs = append(attrs, slog.Any("tool_result", content.ToolResult))
 			attrs = append(attrs, slog.Bool("tool_error", content.ToolError))
 		case ContentTypeThinking:
-			attrs = append(attrs, slog.String("thinking", content.Text))
+			attrs = append(attrs, slog.String("thinking", content.Thinking))
 		default:
 			attrs = append(attrs, slog.String("unknown_content_type", content.Type.String()))
 			attrs = append(attrs, slog.Any("text", content)) // just log it all raw, better to have too much than not enough
@@ -213,9 +213,10 @@ type (
 	ContentType    int
 	ToolChoiceType int
 	StopReason     int
+	ThinkingLevel  int
 )
 
-//go:generate go tool golang.org/x/tools/cmd/stringer -type=MessageRole,ContentType,ToolChoiceType,StopReason -output=llm_string.go
+//go:generate go tool golang.org/x/tools/cmd/stringer -type=MessageRole,ContentType,ToolChoiceType,StopReason,ThinkingLevel -output=llm_string.go
 
 const (
 	MessageRoleUser MessageRole = iota
@@ -239,6 +240,48 @@ const (
 	StopReasonRefusal
 )
 
+// ThinkingLevel controls how much thinking/reasoning the model does.
+// ThinkingLevelOff is the zero value and disables thinking.
+const (
+	ThinkingLevelOff     ThinkingLevel = iota // No thinking (zero value)
+	ThinkingLevelMinimal                      // Minimal thinking (1024 tokens / "minimal")
+	ThinkingLevelLow                          // Low thinking (2048 tokens / "low")
+	ThinkingLevelMedium                       // Medium thinking (8192 tokens / "medium")
+	ThinkingLevelHigh                         // High thinking (16384 tokens / "high")
+)
+
+// ThinkingBudgetTokens returns the recommended budget_tokens for Anthropic's extended thinking.
+func (t ThinkingLevel) ThinkingBudgetTokens() int {
+	switch t {
+	case ThinkingLevelMinimal:
+		return 1024
+	case ThinkingLevelLow:
+		return 2048
+	case ThinkingLevelMedium:
+		return 8192
+	case ThinkingLevelHigh:
+		return 16384
+	default:
+		return 0
+	}
+}
+
+// ThinkingEffort returns the reasoning effort string for OpenAI's reasoning API.
+func (t ThinkingLevel) ThinkingEffort() string {
+	switch t {
+	case ThinkingLevelMinimal:
+		return "minimal"
+	case ThinkingLevelLow:
+		return "low"
+	case ThinkingLevelMedium:
+		return "medium"
+	case ThinkingLevelHigh:
+		return "high"
+	default:
+		return ""
+	}
+}
+
 type Response struct {
 	ID           string
 	Type         string

llm/llm_string.go 🔗

@@ -1,4 +1,4 @@
-// Code generated by "stringer -type=MessageRole,ContentType,ToolChoiceType,StopReason -output=llm_string.go"; DO NOT EDIT.
+// Code generated by "stringer -type=MessageRole,ContentType,ToolChoiceType,StopReason,ThinkingLevel -output=llm_string.go"; DO NOT EDIT.
 
 package llm
 
@@ -88,3 +88,25 @@ func (i StopReason) String() string {
 	}
 	return _StopReason_name[_StopReason_index[idx]:_StopReason_index[idx+1]]
 }
+func _() {
+	// An "invalid array index" compiler error signifies that the constant values have changed.
+	// Re-run the stringer command to generate them again.
+	var x [1]struct{}
+	_ = x[ThinkingLevelOff-0]
+	_ = x[ThinkingLevelMinimal-1]
+	_ = x[ThinkingLevelLow-2]
+	_ = x[ThinkingLevelMedium-3]
+	_ = x[ThinkingLevelHigh-4]
+}
+
+const _ThinkingLevel_name = "ThinkingLevelOffThinkingLevelMinimalThinkingLevelLowThinkingLevelMediumThinkingLevelHigh"
+
+var _ThinkingLevel_index = [...]uint8{0, 16, 36, 52, 71, 88}
+
+func (i ThinkingLevel) String() string {
+	idx := int(i) - 0
+	if i < 0 || idx >= len(_ThinkingLevel_index)-1 {
+		return "ThinkingLevel(" + strconv.FormatInt(int64(i), 10) + ")"
+	}
+	return _ThinkingLevel_name[_ThinkingLevel_index[idx]:_ThinkingLevel_index[idx+1]]
+}

llm/oai/oai_responses.go 🔗

@@ -21,13 +21,14 @@ import (
 // This API is required for models like gpt-5.1-codex.
 // Fields should not be altered concurrently with calling any method on ResponsesService.
 type ResponsesService struct {
-	HTTPC     *http.Client // defaults to http.DefaultClient if nil
-	APIKey    string       // optional, if not set will try to load from env var
-	Model     Model        // defaults to DefaultModel if zero value
-	ModelURL  string       // optional, overrides Model.URL
-	MaxTokens int          // defaults to DefaultMaxTokens if zero
-	Org       string       // optional - organization ID
-	DumpLLM   bool         // whether to dump request/response text to files for debugging; defaults to false
+	HTTPC         *http.Client      // defaults to http.DefaultClient if nil
+	APIKey        string            // optional, if not set will try to load from env var
+	Model         Model             // defaults to DefaultModel if zero value
+	ModelURL      string            // optional, overrides Model.URL
+	MaxTokens     int               // defaults to DefaultMaxTokens if zero
+	Org           string            // optional - organization ID
+	DumpLLM       bool              // whether to dump request/response text to files for debugging; defaults to false
+	ThinkingLevel llm.ThinkingLevel // thinking level (ThinkingLevelOff disables reasoning)
 }
 
 var _ llm.Service = (*ResponsesService)(nil)
@@ -391,6 +392,14 @@ func (s *ResponsesService) Do(ctx context.Context, ir *llm.Request) (*llm.Respon
 		MaxOutputTokens: cmp.Or(s.MaxTokens, DefaultMaxTokens),
 	}
 
+	// Add reasoning if thinking is enabled
+	if s.ThinkingLevel != llm.ThinkingLevelOff {
+		effort := s.ThinkingLevel.ThinkingEffort()
+		if effort != "" {
+			req.Reasoning = &responsesReasoning{Effort: effort}
+		}
+	}
+
 	// Add tool choice if specified
 	if ir.ToolChoice != nil {
 		req.ToolChoice = fromLLMToolChoice(ir.ToolChoice)

loop/loop_test.go 🔗

@@ -1183,7 +1183,7 @@ func TestPredictableServiceMaxImageDimension(t *testing.T) {
 	}
 }
 
-func TestPredictableServiceThinkTool(t *testing.T) {
+func TestPredictableServiceThinking(t *testing.T) {
 	service := NewPredictableService()
 
 	ctx := context.Background()
@@ -1195,34 +1195,30 @@ func TestPredictableServiceThinkTool(t *testing.T) {
 
 	resp, err := service.Do(ctx, req)
 	if err != nil {
-		t.Fatalf("think tool test failed: %v", err)
+		t.Fatalf("thinking test failed: %v", err)
 	}
 
-	if resp.StopReason != llm.StopReasonToolUse {
-		t.Errorf("expected tool use stop reason, got %v", resp.StopReason)
+	// Now returns EndTurn since thinking is content, not a tool
+	if resp.StopReason != llm.StopReasonEndTurn {
+		t.Errorf("expected end turn stop reason, got %v", resp.StopReason)
 	}
 
-	// Find the tool use content
-	var toolUseContent *llm.Content
+	// Find the thinking content
+	var thinkingContent *llm.Content
 	for _, content := range resp.Content {
-		if content.Type == llm.ContentTypeToolUse && content.ToolName == "think" {
-			toolUseContent = &content
+		if content.Type == llm.ContentTypeThinking {
+			thinkingContent = &content
 			break
 		}
 	}
 
-	if toolUseContent == nil {
-		t.Fatal("no think tool use content found")
-	}
-
-	// Check tool input contains the thoughts
-	var toolInput map[string]interface{}
-	if err := json.Unmarshal(toolUseContent.ToolInput, &toolInput); err != nil {
-		t.Fatalf("failed to parse tool input: %v", err)
+	if thinkingContent == nil {
+		t.Fatal("no thinking content found")
 	}
 
-	if toolInput["thoughts"] != "This is a test thought" {
-		t.Errorf("expected thoughts 'This is a test thought', got '%v'", toolInput["thoughts"])
+	// Check thinking content contains the thoughts
+	if thinkingContent.Thinking != "This is a test thought" {
+		t.Errorf("expected thinking 'This is a test thought', got '%v'", thinkingContent.Thinking)
 	}
 }

loop/predictable.go 🔗

@@ -20,7 +20,7 @@ import (
 // Available patterns include:
 //   - "echo: <text>" - echoes the text back
 //   - "bash: <command>" - triggers bash tool with command
-//   - "think: <thoughts>" - triggers think tool
+//   - "think: <thoughts>" - returns response with extended thinking content
 //   - "subagent: <slug> <prompt>" - triggers subagent tool
 //   - "change_dir: <path>" - triggers change_dir tool
 //   - "delay: <seconds>" - delays response by specified seconds
@@ -112,7 +112,7 @@ func (s *PredictableService) Do(ctx context.Context, req *llm.Request) (*llm.Res
 		return s.makeResponse("Hello! I'm Shelley, your AI assistant. How can I help you today?", inputTokens), nil
 
 	case "Create an example":
-		return s.makeThinkToolResponse("I'll create a simple example for you.", inputTokens), nil
+		return s.makeThinkingResponse("I'll create a simple example for you.", inputTokens), nil
 
 	case "screenshot":
 		// Trigger a screenshot of the current page
@@ -155,7 +155,7 @@ func (s *PredictableService) Do(ctx context.Context, req *llm.Request) (*llm.Res
 
 		if strings.HasPrefix(inputText, "think: ") {
 			thoughts := strings.TrimPrefix(inputText, "think: ")
-			return s.makeThinkToolResponse(thoughts, inputTokens), nil
+			return s.makeThinkingResponse(thoughts, inputTokens), nil
 		}
 
 		if strings.HasPrefix(inputText, "patch: ") {
@@ -288,32 +288,23 @@ func (s *PredictableService) makeBashToolResponse(command string, inputTokens ui
 	}
 }
 
-// makeThinkToolResponse creates a response that calls the think tool
-func (s *PredictableService) makeThinkToolResponse(thoughts string, inputTokens uint64) *llm.Response {
-	// Properly marshal the thoughts to avoid JSON escaping issues
-	toolInputData := map[string]string{"thoughts": thoughts}
-	toolInputBytes, _ := json.Marshal(toolInputData)
-	toolInput := json.RawMessage(toolInputBytes)
-	responseText := "Let me think about this."
-	outputTokens := uint64(len(responseText)/4 + len(toolInputBytes)/4)
+// makeThinkingResponse creates a response with extended thinking content
+func (s *PredictableService) makeThinkingResponse(thoughts string, inputTokens uint64) *llm.Response {
+	responseText := "I've considered my approach."
+	outputTokens := uint64(len(responseText)/4 + len(thoughts)/4)
 	if outputTokens == 0 {
 		outputTokens = 1
 	}
 	return &llm.Response{
-		ID:    fmt.Sprintf("pred-think-%d", time.Now().UnixNano()),
+		ID:    fmt.Sprintf("pred-thinking-%d", time.Now().UnixNano()),
 		Type:  "message",
 		Role:  llm.MessageRoleAssistant,
 		Model: "predictable-v1",
 		Content: []llm.Content{
+			{Type: llm.ContentTypeThinking, Thinking: thoughts},
 			{Type: llm.ContentTypeText, Text: responseText},
-			{
-				ID:        fmt.Sprintf("tool_%d", time.Now().UnixNano()%1000),
-				Type:      llm.ContentTypeToolUse,
-				ToolName:  "think",
-				ToolInput: toolInput,
-			},
 		},
-		StopReason: llm.StopReasonToolUse,
+		StopReason: llm.StopReasonEndTurn,
 		Usage: llm.Usage{
 			InputTokens:  inputTokens,
 			OutputTokens: outputTokens,
@@ -629,13 +620,10 @@ func (s *PredictableService) makeToolSmorgasbordResponse(inputTokens uint64) *ll
 		ToolInput: json.RawMessage(bashInput),
 	})
 
-	// think tool
-	thinkInput, _ := json.Marshal(map[string]string{"thoughts": "I'm thinking about the best approach for this task. Let me consider all the options available."})
+	// extended thinking content (not a tool)
 	content = append(content, llm.Content{
-		ID:        fmt.Sprintf("tool_think_%d", (baseNano+1)%1000),
-		Type:      llm.ContentTypeToolUse,
-		ToolName:  "think",
-		ToolInput: json.RawMessage(thinkInput),
+		Type:     llm.ContentTypeThinking,
+		Thinking: "I'm thinking about the best approach for this task. Let me consider all the options available.",
 	})
 
 	// patch tool

models/models.go 🔗

@@ -166,7 +166,7 @@ func All() []Model {
 				if config.AnthropicAPIKey == "" {
 					return nil, fmt.Errorf("claude-opus-4.5 requires ANTHROPIC_API_KEY")
 				}
-				svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Opus, HTTPC: httpc}
+				svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Opus, HTTPC: httpc, ThinkingLevel: llm.ThinkingLevelMedium}
 				if url := config.getAnthropicURL(); url != "" {
 					svc.URL = url
 				}
@@ -183,7 +183,7 @@ func All() []Model {
 				if config.AnthropicAPIKey == "" {
 					return nil, fmt.Errorf("claude-sonnet-4.5 requires ANTHROPIC_API_KEY")
 				}
-				svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Sonnet, HTTPC: httpc}
+				svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Sonnet, HTTPC: httpc, ThinkingLevel: llm.ThinkingLevelMedium}
 				if url := config.getAnthropicURL(); url != "" {
 					svc.URL = url
 				}
@@ -200,7 +200,7 @@ func All() []Model {
 				if config.AnthropicAPIKey == "" {
 					return nil, fmt.Errorf("claude-haiku-4.5 requires ANTHROPIC_API_KEY")
 				}
-				svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Haiku, HTTPC: httpc}
+				svc := &ant.Service{APIKey: config.AnthropicAPIKey, Model: ant.Claude45Haiku, HTTPC: httpc, ThinkingLevel: llm.ThinkingLevelMedium}
 				if url := config.getAnthropicURL(); url != "" {
 					svc.URL = url
 				}
@@ -234,7 +234,7 @@ func All() []Model {
 				if config.OpenAIAPIKey == "" {
 					return nil, fmt.Errorf("gpt-5.2-codex requires OPENAI_API_KEY")
 				}
-				svc := &oai.ResponsesService{Model: oai.GPT52Codex, APIKey: config.OpenAIAPIKey, HTTPC: httpc}
+				svc := &oai.ResponsesService{Model: oai.GPT52Codex, APIKey: config.OpenAIAPIKey, HTTPC: httpc, ThinkingLevel: llm.ThinkingLevelMedium}
 				if url := config.getOpenAIURL(); url != "" {
 					svc.ModelURL = url
 				}
@@ -682,10 +682,11 @@ func (m *Manager) createServiceFromModel(model *generated.Model) llm.Service {
 	switch model.ProviderType {
 	case "anthropic":
 		return &ant.Service{
-			APIKey: model.ApiKey,
-			URL:    model.Endpoint,
-			Model:  model.ModelName,
-			HTTPC:  m.httpc,
+			APIKey:        model.ApiKey,
+			URL:           model.Endpoint,
+			Model:         model.ModelName,
+			HTTPC:         m.httpc,
+			ThinkingLevel: llm.ThinkingLevelMedium,
 		}
 	case "openai":
 		return &oai.Service{
@@ -706,8 +707,9 @@ func (m *Manager) createServiceFromModel(model *generated.Model) llm.Service {
 				ModelName: model.ModelName,
 				URL:       model.Endpoint,
 			},
-			MaxTokens: int(model.MaxTokens),
-			HTTPC:     m.httpc,
+			MaxTokens:     int(model.MaxTokens),
+			HTTPC:         m.httpc,
+			ThinkingLevel: llm.ThinkingLevelMedium,
 		}
 	case "gemini":
 		return &gem.Service{

server/custom_models.go 🔗

@@ -349,9 +349,10 @@ func (s *Server) handleTestModel(w http.ResponseWriter, r *http.Request) {
 	switch req.ProviderType {
 	case "anthropic":
 		service = &ant.Service{
-			APIKey: req.APIKey,
-			URL:    req.Endpoint,
-			Model:  req.ModelName,
+			APIKey:        req.APIKey,
+			URL:           req.Endpoint,
+			Model:         req.ModelName,
+			ThinkingLevel: llm.ThinkingLevelMedium,
 		}
 	case "openai":
 		service = &oai.Service{

test/server_test.go 🔗

@@ -333,10 +333,10 @@ func TestPredictableServiceWithTools(t *testing.T) {
 		t.Fatal("Expected greeting to mention Shelley")
 	}
 
-	// Second call should return tool use
+	// Second call should return tool use (bash command)
 	resp2, err := service.Do(context.Background(), &llm.Request{
 		Messages: []llm.Message{
-			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "Create an example"}}},
+			{Role: llm.MessageRoleUser, Content: []llm.Content{{Type: llm.ContentTypeText, Text: "bash: echo hello"}}},
 		},
 	})
 	if err != nil {
@@ -364,8 +364,8 @@ func TestPredictableServiceWithTools(t *testing.T) {
 		t.Fatal("Expected tool use content")
 	}
 
-	if toolUse.ToolName != "think" {
-		t.Fatalf("Expected think tool, got %s", toolUse.ToolName)
+	if toolUse.ToolName != "bash" {
+		t.Fatalf("Expected bash tool, got %s", toolUse.ToolName)
 	}
 }

ui/src/components/ChatInterface.tsx 🔗

@@ -15,7 +15,7 @@ import DiffViewer from "./DiffViewer";
 import BashTool from "./BashTool";
 import PatchTool from "./PatchTool";
 import ScreenshotTool from "./ScreenshotTool";
-import ThinkTool from "./ThinkTool";
+
 import KeywordSearchTool from "./KeywordSearchTool";
 import BrowserNavigateTool from "./BrowserNavigateTool";
 import BrowserEvalTool from "./BrowserEvalTool";
@@ -227,7 +227,7 @@ const TOOL_COMPONENTS: Record<string, React.ComponentType<any>> = {
   patch: PatchTool,
   screenshot: ScreenshotTool,
   browser_take_screenshot: ScreenshotTool,
-  think: ThinkTool,
+
   keyword_search: KeywordSearchTool,
   browser_navigate: BrowserNavigateTool,
   browser_eval: BrowserEvalTool,
@@ -1192,6 +1192,9 @@ function ChatInterface({
               coalescedItems.push({ type: "message", message });
             }
 
+            // Check if this message was truncated (tool calls lost)
+            const wasTruncated = llmData.ExcludedFromContext === true;
+
             // Add tool uses as separate items
             toolUses.forEach((toolUse) => {
               const resultData = toolUse.ID ? toolResultMap[toolUse.ID] : undefined;
@@ -1203,10 +1206,12 @@ function ChatInterface({
                 toolName: toolUse.ToolName,
                 toolInput: toolUse.ToolInput,
                 toolResult: resultData?.result,
-                toolError: resultData?.error,
+                // Mark as error if truncated and no result
+                toolError: resultData?.error || (wasTruncated && !resultData),
                 toolStartTime: resultData?.startTime,
                 toolEndTime: resultData?.endTime,
-                hasResult: !!resultData || completedViaDisplay,
+                // Mark as complete if truncated (tool was lost, not running)
+                hasResult: !!resultData || completedViaDisplay || wasTruncated,
                 display: displayData,
               });
             });

ui/src/components/Message.tsx 🔗

@@ -5,7 +5,7 @@ import BashTool from "./BashTool";
 import PatchTool from "./PatchTool";
 import ScreenshotTool from "./ScreenshotTool";
 import GenericTool from "./GenericTool";
-import ThinkTool from "./ThinkTool";
+
 import KeywordSearchTool from "./KeywordSearchTool";
 import BrowserNavigateTool from "./BrowserNavigateTool";
 import BrowserEvalTool from "./BrowserEvalTool";
@@ -15,6 +15,7 @@ import ChangeDirTool from "./ChangeDirTool";
 import BrowserResizeTool from "./BrowserResizeTool";
 import SubagentTool from "./SubagentTool";
 import OutputIframeTool from "./OutputIframeTool";
+import ThinkingContent from "./ThinkingContent";
 import UsageDetailModal from "./UsageDetailModal";
 import MessageActionBar from "./MessageActionBar";
 
@@ -283,7 +284,7 @@ function Message({ message, onOpenDiffViewer, onCommentTextChange }: MessageProp
     }
   };
 
-  // Get text content from message for copying (includes tool results)
+  // Get text content from message for copying (includes tool results and thinking)
   const getMessageText = (): string => {
     if (!llmMessage?.Content) return "";
 
@@ -292,6 +293,12 @@ function Message({ message, onOpenDiffViewer, onCommentTextChange }: MessageProp
       const contentType = getContentType(content.Type);
       if (contentType === "text" && content.Text) {
         textParts.push(content.Text);
+      } else if (contentType === "thinking") {
+        // Include thinking content
+        const thinkingText = content.Thinking || content.Text;
+        if (thinkingText) {
+          textParts.push(`[Thinking]\n${thinkingText}`);
+        }
       } else if (contentType === "tool_result" && content.ToolResult) {
         // Extract text from tool result content
         content.ToolResult.forEach((result) => {
@@ -459,10 +466,7 @@ function Message({ message, onOpenDiffViewer, onCommentTextChange }: MessageProp
         if (content.ToolName === "screenshot" || content.ToolName === "browser_take_screenshot") {
           return <ScreenshotTool toolInput={content.ToolInput} isRunning={true} />;
         }
-        // Use specialized component for think tool
-        if (content.ToolName === "think") {
-          return <ThinkTool toolInput={content.ToolInput} isRunning={true} />;
-        }
+
         // Use specialized component for change_dir tool
         if (content.ToolName === "change_dir") {
           return <ChangeDirTool toolInput={content.ToolInput} isRunning={true} />;
@@ -598,20 +602,6 @@ function Message({ message, onOpenDiffViewer, onCommentTextChange }: MessageProp
               toolResult={content.ToolResult}
               hasError={hasError}
               executionTime={executionTime}
-              display={content.Display}
-            />
-          );
-        }
-
-        // Use specialized component for think tool
-        if (toolName === "think") {
-          return (
-            <ThinkTool
-              toolInput={toolInput}
-              isRunning={false}
-              toolResult={content.ToolResult}
-              hasError={hasError}
-              executionTime={executionTime}
             />
           );
         }
@@ -753,9 +743,11 @@ function Message({ message, onOpenDiffViewer, onCommentTextChange }: MessageProp
       }
       case "redacted_thinking":
         return <div className="text-tertiary italic text-sm">[Thinking content hidden]</div>;
-      case "thinking":
-        // Hide thinking content by default in main flow, but could be made expandable
-        return null;
+      case "thinking": {
+        const thinkingText = content.Thinking || content.Text || "";
+        if (!thinkingText) return null;
+        return <ThinkingContent thinking={thinkingText} />;
+      }
       default: {
         // For unknown content types, show the type and try to display useful content
         const displayText = content.Text || content.Data || "";
@@ -989,18 +981,22 @@ function Message({ message, onOpenDiffViewer, onCommentTextChange }: MessageProp
     return null;
   }
 
-  // Filter out thinking content, empty content, tool_use, and tool_result
+  // Filter out redacted thinking, empty content, tool_use, and tool_result
+  // Keep thinking content (3) for display
   const meaningfulContent =
     llmMessage?.Content?.filter((c) => {
       const contentType = c.Type;
-      // Filter out thinking (3), redacted thinking (4), tool_use (5), tool_result (6), and empty text content
+      // Filter out redacted thinking (4), tool_use (5), tool_result (6), and empty text content
+      // Keep thinking (3) if it has content
+      if (contentType === 3) {
+        return !!(c.Thinking || c.Text);
+      }
       return (
-        contentType !== 3 &&
         contentType !== 4 &&
         contentType !== 5 &&
         contentType !== 6 &&
         (c.Text?.trim() || contentType !== 2)
-      ); // 3 = thinking, 4 = redacted_thinking, 5 = tool_use, 6 = tool_result, 2 = text
+      ); // 4 = redacted_thinking, 5 = tool_use, 6 = tool_result, 2 = text
     }) || [];
 
   // Don't filter out messages that contain operation status like "[Operation cancelled]"

ui/src/components/ThinkTool.tsx 🔗

@@ -1,94 +0,0 @@
-import React, { useState } from "react";
-import { LLMContent } from "../types";
-
-interface ThinkToolProps {
-  // For tool_use (pending state)
-  toolInput?: unknown; // { thoughts: string }
-  isRunning?: boolean;
-
-  // For tool_result (completed state)
-  toolResult?: LLMContent[];
-  hasError?: boolean;
-  executionTime?: string;
-}
-
-function ThinkTool({ toolInput, isRunning, toolResult, hasError, executionTime }: ThinkToolProps) {
-  const [isExpanded, setIsExpanded] = useState(false);
-
-  // Extract thoughts from toolInput
-  const thoughts =
-    typeof toolInput === "object" &&
-    toolInput !== null &&
-    "thoughts" in toolInput &&
-    typeof toolInput.thoughts === "string"
-      ? toolInput.thoughts
-      : typeof toolInput === "string"
-        ? toolInput
-        : "";
-
-  // Truncate thoughts for display - get first 50 chars
-  const truncateThoughts = (text: string, maxLen: number = 50) => {
-    if (!text) return "";
-    if (text.length <= maxLen) return text;
-    return text.substring(0, maxLen) + "...";
-  };
-
-  const displayThoughts = truncateThoughts(thoughts);
-  const isComplete = !isRunning && toolResult !== undefined;
-
-  return (
-    <div className="tool" data-testid={isComplete ? "tool-call-completed" : "tool-call-running"}>
-      <div className="tool-header" onClick={() => setIsExpanded(!isExpanded)}>
-        <div className="tool-summary">
-          <span className={`tool-emoji ${isRunning ? "running" : ""}`}>💭</span>
-          <span className="tool-command">
-            {displayThoughts || (isRunning ? "thinking..." : "thinking...")}
-          </span>
-          {isComplete && hasError && <span className="tool-error">✗</span>}
-          {isComplete && !hasError && <span className="tool-success">✓</span>}
-        </div>
-        <button
-          className="tool-toggle"
-          aria-label={isExpanded ? "Collapse" : "Expand"}
-          aria-expanded={isExpanded}
-        >
-          <svg
-            width="12"
-            height="12"
-            viewBox="0 0 12 12"
-            fill="none"
-            xmlns="http://www.w3.org/2000/svg"
-            style={{
-              transform: isExpanded ? "rotate(90deg)" : "rotate(0deg)",
-              transition: "transform 0.2s",
-            }}
-          >
-            <path
-              d="M4.5 3L7.5 6L4.5 9"
-              stroke="currentColor"
-              strokeWidth="1.5"
-              strokeLinecap="round"
-              strokeLinejoin="round"
-            />
-          </svg>
-        </button>
-      </div>
-
-      {isExpanded && (
-        <div className="tool-details">
-          <div className="tool-section">
-            <div className="tool-label">
-              Thoughts:
-              {executionTime && <span className="tool-time">{executionTime}</span>}
-            </div>
-            <div className={`tool-code ${hasError ? "error" : ""}`}>
-              {thoughts || "(no thoughts)"}
-            </div>
-          </div>
-        </div>
-      )}
-    </div>
-  );
-}
-
-export default ThinkTool;

ui/src/components/ThinkingContent.tsx 🔗

@@ -0,0 +1,90 @@
+import React, { useState } from "react";
+
+interface ThinkingContentProps {
+  thinking: string;
+}
+
+function ThinkingContent({ thinking }: ThinkingContentProps) {
+  const [isExpanded, setIsExpanded] = useState(true);
+
+  // Truncate thinking for display - get first 80 chars
+  const truncateThinking = (text: string, maxLen: number = 80) => {
+    if (!text) return "";
+    const firstLine = text.split("\n")[0];
+    if (firstLine.length <= maxLen) return firstLine;
+    return firstLine.substring(0, maxLen) + "...";
+  };
+
+  const preview = truncateThinking(thinking);
+
+  return (
+    <div
+      className="thinking-content"
+      data-testid="thinking-content"
+      style={{
+        marginBottom: "0.5rem",
+      }}
+    >
+      <div
+        onClick={() => setIsExpanded(!isExpanded)}
+        style={{
+          cursor: "pointer",
+          display: "flex",
+          alignItems: "flex-start",
+          gap: "0.5rem",
+          marginLeft: 0,
+        }}
+      >
+        <span style={{ flexShrink: 0 }}>💭</span>
+        <div
+          style={{
+            flex: 1,
+            fontStyle: "italic",
+            color: "var(--text-secondary)",
+            whiteSpace: "pre-wrap",
+            wordBreak: "break-word",
+          }}
+        >
+          {isExpanded ? thinking : preview}
+        </div>
+        <button
+          className="thinking-toggle"
+          aria-label={isExpanded ? "Collapse" : "Expand"}
+          aria-expanded={isExpanded}
+          style={{
+            background: "none",
+            border: "none",
+            padding: "0.25rem",
+            cursor: "pointer",
+            color: "var(--text-tertiary)",
+            display: "flex",
+            alignItems: "center",
+            flexShrink: 0,
+          }}
+        >
+          <svg
+            width="12"
+            height="12"
+            viewBox="0 0 12 12"
+            fill="none"
+            xmlns="http://www.w3.org/2000/svg"
+            style={{
+              transform: isExpanded ? "rotate(90deg)" : "rotate(0deg)",
+              transition: "transform 0.2s",
+            }}
+          >
+            <path
+              d="M4.5 3L7.5 6L4.5 9"
+              stroke="currentColor"
+              strokeWidth="1.5"
+              strokeLinecap="round"
+              strokeLinejoin="round"
+            />
+          </svg>
+        </button>
+      </div>
+    </div>
+  );
+}
+
+export default ThinkingContent;