Merge pull request #2908 from charmbracelet/fast-ui

Christian Rocha created 2 months ago

perf(ui): tui performance improvements

Change summary

internal/agent/agent.go                            |  14 
internal/app/app.go                                |  16 
internal/backend/session.go                        |   6 
internal/message/message.go                        | 341 ++++++
internal/message/message_test.go                   | 695 +++++++++++++
internal/pubsub/broker.go                          | 148 ++
internal/ui/anim/anim.go                           |  54 
internal/ui/chat/agent.go                          |  58 +
internal/ui/chat/applyhighlight_callback_test.go   | 207 +++
internal/ui/chat/assistant.go                      | 475 ++++++++
internal/ui/chat/assistant_section_cache_test.go   | 464 ++++++++
internal/ui/chat/assistant_test.go                 | 112 +
internal/ui/chat/assistant_thinking_window_test.go | 500 +++++++++
internal/ui/chat/incremental_glamour_test.go       | 853 ++++++++++++++++
internal/ui/chat/messages.go                       | 102 +
internal/ui/chat/prefix_cache_test.go              | 196 +++
internal/ui/chat/streaming_markdown.go             | 740 +++++++++++++
internal/ui/chat/tools.go                          |  75 +
internal/ui/chat/user.go                           |  37 
internal/ui/chat/version_bump_test.go              | 565 ++++++++++
internal/ui/common/markdown.go                     |  74 +
internal/ui/completions/item.go                    |  36 
internal/ui/completions/version_bump_test.go       |  85 +
internal/ui/dialog/commands_item.go                |  35 
internal/ui/dialog/models_item.go                  |  31 
internal/ui/dialog/reasoning.go                    |  22 
internal/ui/dialog/sessions_item.go                |  38 
internal/ui/dialog/version_bump_test.go            | 219 ++++
internal/ui/list/item.go                           |  67 +
internal/ui/list/list.go                           | 274 ++++
internal/ui/list/list_test.go                      | 770 ++++++++++++++
internal/ui/model/chat.go                          | 113 ++
internal/ui/model/chat_draw_cache_test.go          | 334 ++++++
internal/ui/model/chat_expand_test.go              |   8 
internal/ui/model/layout_test.go                   |   2 
internal/workspace/app_workspace.go                |   6 
36 files changed, 7,592 insertions(+), 180 deletions(-)

Detailed changes

internal/agent/agent.go 🔗

@@ -245,6 +245,15 @@ func (a *sessionAgent) Run(ctx context.Context, call SessionAgentCall) (*fantasy
 
 	defer cancel()
 	defer a.activeRequests.Del(call.SessionID)
+	// Drain any debounced message updates before returning. message.Service
+	// already flushes synchronously on terminal updates, but a defer here
+	// guarantees the contract at every Run exit (success, error, panic
+	// recovery upstream) without callers needing to know.
+	defer func() {
+		if flushErr := a.messages.FlushAll(ctx); flushErr != nil {
+			slog.Error("Failed to flush pending message updates after run", "error", flushErr)
+		}
+	}()
 
 	history, files := a.preparePrompt(msgs, largeModel.CatwalkCfg.SupportsImages, call.Attachments...)
 
@@ -653,6 +662,11 @@ func (a *sessionAgent) Summarize(ctx context.Context, sessionID string, opts fan
 	a.activeRequests.Set(sessionID, cancel)
 	defer a.activeRequests.Del(sessionID)
 	defer cancel()
+	defer func() {
+		if flushErr := a.messages.FlushAll(ctx); flushErr != nil {
+			slog.Error("Failed to flush pending message updates after summarize", "error", flushErr)
+		}
+	}()
 
 	agent := fantasy.NewAgent(largeModel.Model,
 		fantasy.WithSystemPrompt(string(summaryPrompt)),

internal/app/app.go 🔗

@@ -584,13 +584,23 @@ func (app *App) Shutdown() {
 		app.AgentCoordinator.CancelAll()
 	}
 
-	// Now run remaining cleanup tasks in parallel.
-	var wg sync.WaitGroup
-
 	// Shared shutdown context for all timeout-bounded cleanup.
 	shutdownCtx, cancel := context.WithTimeout(context.Background(), 5*time.Second)
 	defer cancel()
 
+	// Drain any debounced message updates before the DB-close cleanup
+	// runs in the parallel block below. message.Service buffers
+	// streaming deltas (see internal/message/message.go) and we must
+	// land them while the connection is still open.
+	if app.Messages != nil {
+		if err := app.Messages.FlushAll(shutdownCtx); err != nil {
+			slog.Error("Failed to flush pending message updates on shutdown", "error", err)
+		}
+	}
+
+	// Now run remaining cleanup tasks in parallel.
+	var wg sync.WaitGroup
+
 	// Send exit event
 	wg.Go(func() {
 		event.AppExited()

internal/backend/session.go 🔗

@@ -72,6 +72,12 @@ func (b *Backend) ListSessionMessages(ctx context.Context, workspaceID, sessionI
 		return nil, err
 	}
 
+	// Drain debounced updates so HTTP clients (and the TUI on session
+	// switch) observe the latest in-memory state rather than racing the
+	// debounce timer in message.Service.
+	if err := ws.Messages.FlushAll(ctx); err != nil {
+		return nil, err
+	}
 	return ws.Messages.List(ctx, sessionID)
 }

internal/message/message.go 🔗

@@ -5,6 +5,7 @@ import (
 	"database/sql"
 	"encoding/json"
 	"fmt"
+	"sync"
 	"time"
 
 	"github.com/charmbracelet/crush/internal/db"
@@ -12,6 +13,13 @@ import (
 	"github.com/google/uuid"
 )
 
+// defaultUpdateDebounce is the default debounce window for [Service.Update].
+// Streaming deltas that arrive within the window are coalesced into a
+// single SQL write and a single pubsub event. Terminal updates
+// (finish/error/cancel/tool-call structural changes) bypass the
+// debounce and flush synchronously.
+const defaultUpdateDebounce = 33 * time.Millisecond
+
 type CreateMessageParams struct {
 	Role             MessageRole
 	Parts            []ContentPart
@@ -20,6 +28,21 @@ type CreateMessageParams struct {
 	IsSummaryMessage bool
 }
 
+// Service is the public interface to the message store.
+//
+// [Service.Update] is eventually consistent: it accepts new state into
+// an in-memory buffer and writes it to SQLite plus publishes a
+// [pubsub.UpdatedEvent] on the next debounce tick (default
+// [defaultUpdateDebounce]) or on the next terminal-state update,
+// whichever comes first. Terminal-state updates — those that finish
+// the message, add or finish a tool call, or end a reasoning section —
+// flush synchronously before [Service.Update] returns.
+//
+// Callers that need stronger ordering (e.g. tests, shutdown,
+// session-switch reads) must use [Service.Flush] or [Service.FlushAll]
+// before reading via [Service.Get] / [Service.List]. Without an
+// explicit flush, a read can race the debounce timer and miss the
+// most recent in-memory state.
 type Service interface {
 	pubsub.Subscriber[Message]
 	Create(ctx context.Context, sessionID string, params CreateMessageParams) (Message, error)
@@ -30,20 +53,87 @@ type Service interface {
 	ListAllUserMessages(ctx context.Context) ([]Message, error)
 	Delete(ctx context.Context, id string) error
 	DeleteSessionMessages(ctx context.Context, sessionID string) error
+
+	// Flush synchronously drains any pending debounced state for the
+	// given message ID, performs the SQL write, and publishes the
+	// resulting [pubsub.UpdatedEvent]. Idempotent; cheap no-op if no
+	// updates are pending. Use this before any read that must observe
+	// the latest [Service.Update].
+	Flush(ctx context.Context, id string) error
+
+	// FlushAll synchronously drains pending debounced state for every
+	// message known to the service. Intended for shutdown and
+	// session-switch paths.
+	FlushAll(ctx context.Context) error
+}
+
+// pendingState holds the in-memory coalescing buffer for a single
+// message ID. All fields except where noted are guarded by
+// service.mu. The flushing flag serializes concurrent flushers for
+// the same ID so SQL writes never reorder.
+type pendingState struct {
+	// latest is the most recent [Message] passed to [Service.Update]
+	// that has not yet been flushed.
+	latest Message
+
+	// dirty is true when latest contains state that has not been
+	// written to SQL since the last successful flush.
+	dirty bool
+
+	// flushing is true while a goroutine is performing the SQL write
+	// for this ID. New updates are still accepted (and re-mark dirty)
+	// but other flushers must back off.
+	flushing bool
+
+	// timer is the active debounce timer, or nil if no flush is
+	// scheduled. Stopped and reset when a terminal update preempts
+	// the debounce window.
+	timer *time.Timer
+
+	// lastFlushed is the snapshot most recently written to SQL. Used
+	// as the baseline for terminal-state detection.
+	lastFlushed Message
+
+	// hasFlushed is false until the first successful write for this
+	// ID; until then lastFlushed is the zero value and must not be
+	// treated as a real prior state.
+	hasFlushed bool
 }
 
 type service struct {
 	*pubsub.Broker[Message]
-	q db.Querier
+	q        db.Querier
+	debounce time.Duration
+
+	mu      sync.Mutex
+	pending map[string]*pendingState
 }
 
-func NewService(q db.Querier) Service {
-	return &service{
-		Broker: pubsub.NewBroker[Message](),
-		q:      q,
+// ServiceOption configures a [Service] at construction.
+type ServiceOption func(*service)
+
+// WithDebounce overrides the debounce window for [Service.Update]. A
+// zero or negative value disables debouncing entirely (every update
+// flushes synchronously). Intended primarily for tests.
+func WithDebounce(d time.Duration) ServiceOption {
+	return func(s *service) {
+		s.debounce = d
 	}
 }
 
+func NewService(q db.Querier, opts ...ServiceOption) Service {
+	s := &service{
+		Broker:   pubsub.NewBroker[Message](),
+		q:        q,
+		debounce: defaultUpdateDebounce,
+		pending:  make(map[string]*pendingState),
+	}
+	for _, opt := range opts {
+		opt(s)
+	}
+	return s
+}
+
 func (s *service) Delete(ctx context.Context, id string) error {
 	message, err := s.Get(ctx, id)
 	if err != nil {
@@ -53,6 +143,16 @@ func (s *service) Delete(ctx context.Context, id string) error {
 	if err != nil {
 		return err
 	}
+	// Drop any pending coalesced state for this ID. We never want to
+	// flush back over a deleted row.
+	s.mu.Lock()
+	if p, ok := s.pending[id]; ok {
+		if p.timer != nil {
+			p.timer.Stop()
+		}
+		delete(s.pending, id)
+	}
+	s.mu.Unlock()
 	// Clone the message before publishing to avoid race conditions with
 	// concurrent modifications to the Parts slice.
 	s.Publish(pubsub.DeletedEvent, message.Clone())
@@ -111,31 +211,240 @@ func (s *service) DeleteSessionMessages(ctx context.Context, sessionID string) e
 	return nil
 }
 
-func (s *service) Update(ctx context.Context, message Message) error {
-	parts, err := marshalParts(message.Parts)
+// Update accepts a new state for a message and either flushes
+// synchronously (terminal updates, debounce <= 0) or buffers it until
+// the next debounce tick. See [Service] for the contract.
+func (s *service) Update(ctx context.Context, msg Message) error {
+	cloned := msg.Clone()
+
+	// Zero or negative debounce: flush every update synchronously. This
+	// preserves the pre-coalescing behaviour for tests and any caller
+	// that explicitly opted out via [WithDebounce].
+	if s.debounce <= 0 {
+		s.mu.Lock()
+		p, ok := s.pending[msg.ID]
+		if !ok {
+			p = &pendingState{}
+			s.pending[msg.ID] = p
+		}
+		p.latest = cloned
+		p.dirty = true
+		s.mu.Unlock()
+		return s.flushOne(ctx, msg.ID, true)
+	}
+
+	s.mu.Lock()
+	p, ok := s.pending[msg.ID]
+	if !ok {
+		p = &pendingState{}
+		s.pending[msg.ID] = p
+	}
+	p.latest = cloned
+	p.dirty = true
+
+	var prev *Message
+	if p.hasFlushed {
+		prev = &p.lastFlushed
+	}
+	terminal := shouldFlushNow(prev, &cloned)
+
+	if terminal {
+		if p.timer != nil {
+			p.timer.Stop()
+			p.timer = nil
+		}
+		s.mu.Unlock()
+		return s.flushOne(ctx, msg.ID, true)
+	}
+
+	// Debounce: schedule a single flush per pending state. If a flush
+	// is already running we let it finish; the trailing dirty bit will
+	// be picked up by the next Update or by Flush.
+	if p.timer == nil && !p.flushing {
+		id := msg.ID
+		p.timer = time.AfterFunc(s.debounce, func() {
+			// Detached from caller ctx so a cancelled stream context
+			// does not strand the buffered write.
+			_ = s.flushOne(context.Background(), id, false)
+		})
+	}
+	s.mu.Unlock()
+	return nil
+}
+
+// Flush implements [Service.Flush].
+func (s *service) Flush(ctx context.Context, id string) error {
+	return s.flushOne(ctx, id, true)
+}
+
+// FlushAll implements [Service.FlushAll]. It snapshots every ID with
+// outstanding work — either dirty buffered state or a flush already in
+// flight — then drains each one. Picking up in-flight IDs ensures
+// FlushAll cannot return while a timer-fired write is still mid-SQL,
+// which is what shutdown and session-switch callers rely on.
+func (s *service) FlushAll(ctx context.Context) error {
+	s.mu.Lock()
+	ids := make([]string, 0, len(s.pending))
+	for id, p := range s.pending {
+		if p.dirty || p.flushing {
+			ids = append(ids, id)
+		}
+	}
+	s.mu.Unlock()
+	var firstErr error
+	for _, id := range ids {
+		if err := s.flushOne(ctx, id, true); err != nil && firstErr == nil {
+			firstErr = err
+		}
+	}
+	return firstErr
+}
+
+// flushOne drains a single message ID. When syncCaller is true the
+// caller is willing to wait through a concurrent in-flight flush so
+// that, on return, lastFlushed equals latest at the moment of return.
+// When false (timer-fired path) we bail if another flusher is already
+// running; that flusher will pick up the trailing dirty bit.
+//
+// Order matters: a sync caller must wait for any in-flight flush to
+// drain even when the buffer is currently clean — that in-flight
+// write has not yet updated the SQL row, so returning early would
+// violate the contract that on success lastFlushed reflects the most
+// recent state.
+func (s *service) flushOne(ctx context.Context, id string, syncCaller bool) error {
+	for {
+		s.mu.Lock()
+		p, ok := s.pending[id]
+		if !ok {
+			s.mu.Unlock()
+			return nil
+		}
+		if p.flushing {
+			if !syncCaller {
+				s.mu.Unlock()
+				return nil
+			}
+			s.mu.Unlock()
+			// Brief yield; in-flight write should land in <1ms typical.
+			time.Sleep(time.Millisecond)
+			continue
+		}
+		if !p.dirty {
+			s.mu.Unlock()
+			return nil
+		}
+
+		if p.timer != nil {
+			p.timer.Stop()
+			p.timer = nil
+		}
+		snap := p.latest
+		// Decide whether this snapshot represents a terminal event
+		// against the prior baseline. We must do this before resetting
+		// dirty/flushing because shouldFlushNow looks at p.lastFlushed
+		// (which is what was on disk before this write).
+		var prev *Message
+		if p.hasFlushed {
+			prev = &p.lastFlushed
+		}
+		isTerminal := shouldFlushNow(prev, &snap)
+		p.flushing = true
+		p.dirty = false
+		s.mu.Unlock()
+
+		err := s.write(ctx, snap)
+
+		s.mu.Lock()
+		p.flushing = false
+		if err == nil {
+			p.lastFlushed = snap
+			p.hasFlushed = true
+		} else {
+			// Restore dirty so the next caller retries.
+			p.dirty = true
+		}
+		// If a delta arrived during the SQL write and we are a sync
+		// caller, the user expects that delta to land too.
+		wasDirty := p.dirty
+		s.mu.Unlock()
+
+		if err != nil {
+			return err
+		}
+
+		// Terminal events — message finished, tool call added or
+		// finished, reasoning ended — use the bounded must-deliver
+		// path so they never get dropped under channel contention.
+		if isTerminal {
+			s.PublishMustDeliver(ctx, pubsub.UpdatedEvent, snap)
+		} else {
+			s.Publish(pubsub.UpdatedEvent, snap)
+		}
+
+		if wasDirty && syncCaller {
+			continue
+		}
+		return nil
+	}
+}
+
+// write performs the unguarded SQL write + UpdatedAt stamp. Caller
+// owns publishing.
+func (s *service) write(ctx context.Context, msg Message) error {
+	parts, err := marshalParts(msg.Parts)
 	if err != nil {
 		return err
 	}
 	finishedAt := sql.NullInt64{}
-	if f := message.FinishPart(); f != nil {
+	if f := msg.FinishPart(); f != nil {
 		finishedAt.Int64 = f.Time
 		finishedAt.Valid = true
 	}
-	err = s.q.UpdateMessage(ctx, db.UpdateMessageParams{
-		ID:         message.ID,
+	if err := s.q.UpdateMessage(ctx, db.UpdateMessageParams{
+		ID:         msg.ID,
 		Parts:      string(parts),
 		FinishedAt: finishedAt,
-	})
-	if err != nil {
+	}); err != nil {
 		return err
 	}
-	message.UpdatedAt = time.Now().Unix()
-	// Clone the message before publishing to avoid race conditions with
-	// concurrent modifications to the Parts slice.
-	s.Publish(pubsub.UpdatedEvent, message.Clone())
 	return nil
 }
 
+// shouldFlushNow returns true when next represents a structural
+// change that must not be silently coalesced: the message just
+// finished, the tool-call set grew, a tool call transitioned to
+// finished, or reasoning just finished. prev is the last-flushed
+// snapshot (or nil if no write has landed yet).
+func shouldFlushNow(prev, next *Message) bool {
+	if next.IsFinished() {
+		return true
+	}
+
+	var prevCalls []ToolCall
+	var prevReasoningFinishedAt int64
+	if prev != nil {
+		prevCalls = prev.ToolCalls()
+		prevReasoningFinishedAt = prev.ReasoningContent().FinishedAt
+	}
+	nextCalls := next.ToolCalls()
+	if len(nextCalls) != len(prevCalls) {
+		return true
+	}
+	for i := range nextCalls {
+		// Bounds-safe: lengths are equal here.
+		if nextCalls[i].Finished != prevCalls[i].Finished {
+			return true
+		}
+		// A tool call's input only matters once it has landed (Finished
+		// flips true). Earlier deltas to Input are debounced with the
+		// rest of the streaming state.
+	}
+	if next.ReasoningContent().FinishedAt > 0 && prevReasoningFinishedAt == 0 {
+		return true
+	}
+	return false
+}
+
 func (s *service) Get(ctx context.Context, id string) (Message, error) {
 	dbMessage, err := s.q.GetMessage(ctx, id)
 	if err != nil {

internal/message/message_test.go 🔗

@@ -0,0 +1,695 @@
+package message
+
+import (
+	"context"
+	"sync"
+	"sync/atomic"
+	"testing"
+	"time"
+
+	"github.com/charmbracelet/crush/internal/db"
+	"github.com/charmbracelet/crush/internal/pubsub"
+	"github.com/charmbracelet/crush/internal/session"
+	"github.com/stretchr/testify/require"
+)
+
+// slowUpdateQuerier wraps a [db.Querier] and forces UpdateMessage to
+// hang on a release channel. Used to simulate an in-flight SQL write.
+type slowUpdateQuerier struct {
+	db.Querier
+	release   chan struct{}
+	started   chan struct{}
+	startOnce sync.Once
+}
+
+func (s *slowUpdateQuerier) UpdateMessage(ctx context.Context, arg db.UpdateMessageParams) error {
+	s.startOnce.Do(func() { close(s.started) })
+	select {
+	case <-s.release:
+	case <-ctx.Done():
+		return ctx.Err()
+	}
+	return s.Querier.UpdateMessage(ctx, arg)
+}
+
+// newTestService spins up a fresh in-memory message.Service backed by a
+// temporary on-disk SQLite database. Returns the service plus a session
+// ID to attach messages to.
+func newTestService(t *testing.T, opts ...ServiceOption) (Service, string) {
+	t.Helper()
+	conn, err := db.Connect(t.Context(), t.TempDir())
+	require.NoError(t, err)
+	t.Cleanup(func() { _ = conn.Close() })
+
+	q := db.New(conn)
+	sessions := session.NewService(q, conn)
+	sess, err := sessions.Create(t.Context(), "test")
+	require.NoError(t, err)
+
+	svc := NewService(q, opts...)
+	return svc, sess.ID
+}
+
+// eventCollector consumes broker events into a slice in a goroutine
+// and exposes thread-safe Snapshot / Reset helpers for assertions.
+type eventCollector struct {
+	mu     sync.Mutex
+	events []pubsub.Event[Message]
+}
+
+func collect(ctx context.Context, sub <-chan pubsub.Event[Message]) *eventCollector {
+	c := &eventCollector{}
+	go func() {
+		for {
+			select {
+			case <-ctx.Done():
+				return
+			case ev, ok := <-sub:
+				if !ok {
+					return
+				}
+				c.mu.Lock()
+				c.events = append(c.events, ev)
+				c.mu.Unlock()
+			}
+		}
+	}()
+	return c
+}
+
+func (c *eventCollector) snapshot() []pubsub.Event[Message] {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	out := make([]pubsub.Event[Message], len(c.events))
+	copy(out, c.events)
+	return out
+}
+
+func (c *eventCollector) reset() {
+	c.mu.Lock()
+	defer c.mu.Unlock()
+	c.events = nil
+}
+
+func TestUpdate_DebouncesTextDeltas(t *testing.T) {
+	t.Parallel()
+
+	// Long-enough debounce that we can verify nothing flushes prematurely.
+	svc, sessionID := newTestService(t, WithDebounce(50*time.Millisecond))
+
+	subCtx, cancelSub := context.WithCancel(t.Context())
+	defer cancelSub()
+	sub := svc.Subscribe(subCtx)
+	collector := collect(subCtx, sub)
+
+	msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{
+		Role: Assistant,
+	})
+	require.NoError(t, err)
+	// Drop the CreatedEvent emitted by Create.
+	time.Sleep(5 * time.Millisecond)
+	collector.reset()
+
+	// Push 5 deltas inside a single debounce window.
+	for i := 0; i < 5; i++ {
+		msg.AppendContent("a")
+		require.NoError(t, svc.Update(t.Context(), msg))
+	}
+
+	// Before the debounce expires no UpdatedEvent should have landed.
+	time.Sleep(10 * time.Millisecond)
+	require.Empty(t, collector.snapshot(), "no events should land before debounce window expires")
+
+	// Wait for the debounce timer to fire.
+	require.Eventually(t, func() bool {
+		return len(collector.snapshot()) >= 1
+	}, time.Second, 5*time.Millisecond)
+	events := collector.snapshot()
+	require.Len(t, events, 1, "5 deltas should coalesce into 1 UpdatedEvent")
+	require.Equal(t, pubsub.UpdatedEvent, events[0].Type)
+	require.Equal(t, "aaaaa", events[0].Payload.Content().Text)
+
+	// Final state must be persisted.
+	got, err := svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.Equal(t, "aaaaa", got.Content().Text)
+}
+
+func TestUpdate_TerminalUpdatesFlushSynchronously(t *testing.T) {
+	t.Parallel()
+
+	svc, sessionID := newTestService(t, WithDebounce(time.Hour))
+
+	subCtx, cancelSub := context.WithCancel(t.Context())
+	defer cancelSub()
+	sub := svc.Subscribe(subCtx)
+	collector := collect(subCtx, sub)
+
+	msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+	time.Sleep(5 * time.Millisecond)
+	collector.reset()
+
+	// AddFinish makes the message terminal; Update must flush
+	// synchronously even with a 1-hour debounce.
+	msg.AppendContent("done")
+	msg.AddFinish(FinishReasonEndTurn, "", "")
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	require.Eventually(t, func() bool {
+		return len(collector.snapshot()) >= 1
+	}, time.Second, 5*time.Millisecond,
+		"terminal update must publish without waiting for debounce")
+	events := collector.snapshot()
+	require.Len(t, events, 1)
+	require.True(t, events[0].Payload.IsFinished())
+
+	got, err := svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.True(t, got.IsFinished())
+}
+
+func TestUpdate_ToolCallStructuralChangeFlushes(t *testing.T) {
+	t.Parallel()
+
+	svc, sessionID := newTestService(t, WithDebounce(time.Hour))
+
+	msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+
+	// Adding a new tool call is a structural change → sync flush.
+	msg.AddToolCall(ToolCall{ID: "tc1", Name: "view", Finished: false})
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	got, err := svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.Len(t, got.ToolCalls(), 1)
+	require.Equal(t, "tc1", got.ToolCalls()[0].ID)
+
+	// Marking the tool call finished is also a structural change.
+	msg.AddToolCall(ToolCall{ID: "tc1", Name: "view", Input: "{}", Finished: true})
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	got, err = svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.True(t, got.ToolCalls()[0].Finished)
+}
+
+func TestUpdate_ReasoningEndFlushes(t *testing.T) {
+	t.Parallel()
+
+	svc, sessionID := newTestService(t, WithDebounce(time.Hour))
+
+	msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+
+	// Reasoning deltas alone debounce.
+	msg.AppendReasoningContent("hmm")
+	require.NoError(t, svc.Update(t.Context(), msg))
+	got, err := svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.Empty(t, got.ReasoningContent().Thinking, "reasoning delta should still be in the debounce buffer")
+
+	// FinishThinking sets FinishedAt → terminal flush.
+	msg.FinishThinking()
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	got, err = svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.Equal(t, "hmm", got.ReasoningContent().Thinking)
+	require.NotZero(t, got.ReasoningContent().FinishedAt)
+}
+
+func TestFlush_DrainsPendingDebouncedUpdates(t *testing.T) {
+	t.Parallel()
+
+	svc, sessionID := newTestService(t, WithDebounce(time.Hour))
+
+	msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+	msg.AppendContent("buffered")
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	// Without a flush the SQL row is unchanged from Create.
+	got, err := svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.Empty(t, got.Content().Text)
+
+	require.NoError(t, svc.Flush(t.Context(), msg.ID))
+
+	got, err = svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.Equal(t, "buffered", got.Content().Text)
+
+	// Subsequent Flush is a no-op.
+	require.NoError(t, svc.Flush(t.Context(), msg.ID))
+}
+
+func TestFlushAll_DrainsAllPending(t *testing.T) {
+	t.Parallel()
+
+	svc, sessionID := newTestService(t, WithDebounce(time.Hour))
+
+	const n = 5
+	msgs := make([]Message, n)
+	for i := range msgs {
+		m, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+		require.NoError(t, err)
+		m.AppendContent("hi")
+		require.NoError(t, svc.Update(t.Context(), m))
+		msgs[i] = m
+	}
+
+	require.NoError(t, svc.FlushAll(t.Context()))
+
+	for _, m := range msgs {
+		got, err := svc.Get(t.Context(), m.ID)
+		require.NoError(t, err)
+		require.Equal(t, "hi", got.Content().Text, "FlushAll should drain every pending message")
+	}
+}
+
+func TestUpdate_OrderingMatchesNonCoalesced(t *testing.T) {
+	t.Parallel()
+
+	// Compare the final state after coalesced vs zero-debounce updates.
+	// A sequence of interleaved text/reasoning/tool-call updates must
+	// converge to the same final DB row either way.
+	build := func(svc Service, sessionID string) Message {
+		msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+		require.NoError(t, err)
+		msg.AppendReasoningContent("thinking 1 ")
+		require.NoError(t, svc.Update(t.Context(), msg))
+		msg.AppendReasoningContent("thinking 2")
+		require.NoError(t, svc.Update(t.Context(), msg))
+		msg.FinishThinking()
+		require.NoError(t, svc.Update(t.Context(), msg))
+		msg.AppendContent("hello ")
+		require.NoError(t, svc.Update(t.Context(), msg))
+		msg.AppendContent("world")
+		require.NoError(t, svc.Update(t.Context(), msg))
+		msg.AddToolCall(ToolCall{ID: "tc", Name: "x", Finished: false})
+		require.NoError(t, svc.Update(t.Context(), msg))
+		msg.AddToolCall(ToolCall{ID: "tc", Name: "x", Input: "{}", Finished: true})
+		require.NoError(t, svc.Update(t.Context(), msg))
+		msg.AddFinish(FinishReasonEndTurn, "", "")
+		require.NoError(t, svc.Update(t.Context(), msg))
+		return msg
+	}
+
+	coalesced, sid1 := newTestService(t, WithDebounce(20*time.Millisecond))
+	a := build(coalesced, sid1)
+	require.NoError(t, coalesced.FlushAll(t.Context()))
+	gotA, err := coalesced.Get(t.Context(), a.ID)
+	require.NoError(t, err)
+
+	immediate, sid2 := newTestService(t, WithDebounce(0))
+	b := build(immediate, sid2)
+	gotB, err := immediate.Get(t.Context(), b.ID)
+	require.NoError(t, err)
+
+	require.Equal(t, gotA.Content().Text, gotB.Content().Text)
+	require.Equal(t, gotA.ReasoningContent().Thinking, gotB.ReasoningContent().Thinking)
+	require.Equal(t, len(gotA.ToolCalls()), len(gotB.ToolCalls()))
+	require.Equal(t, gotA.IsFinished(), gotB.IsFinished())
+}
+
+func TestDelete_DropsPendingState(t *testing.T) {
+	t.Parallel()
+
+	svc, sessionID := newTestService(t, WithDebounce(time.Hour))
+	msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+	msg.AppendContent("dropped")
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	require.NoError(t, svc.Delete(t.Context(), msg.ID))
+
+	// FlushAll after Delete must not write to the deleted row.
+	require.NoError(t, svc.FlushAll(t.Context()))
+
+	_, err = svc.Get(t.Context(), msg.ID)
+	require.Error(t, err, "deleted message must remain deleted")
+}
+
+func TestBroker_PublishLossyDropCounter(t *testing.T) {
+	t.Parallel()
+
+	// Tiny channel buffer so we can saturate from a single sender.
+	b := pubsub.NewBrokerWithOptions[int](1)
+	defer b.Shutdown()
+
+	subCtx, cancel := context.WithCancel(t.Context())
+	defer cancel()
+	sub := b.Subscribe(subCtx)
+	require.NotNil(t, sub)
+
+	// Don't read from sub. Saturate the buffer.
+	for range 100 {
+		b.Publish(pubsub.UpdatedEvent, 1)
+	}
+
+	require.GreaterOrEqual(t, b.DropCount(), uint64(1),
+		"lossy Publish must increment the drop counter under contention")
+}
+
+func TestBroker_PublishMustDeliverHonorsTimeout(t *testing.T) {
+	t.Parallel()
+
+	b := pubsub.NewBrokerWithOptions[int](1)
+	b.SetMustDeliverTimeout(20 * time.Millisecond)
+	defer b.Shutdown()
+
+	subCtx, cancel := context.WithCancel(t.Context())
+	defer cancel()
+	sub := b.Subscribe(subCtx)
+	require.NotNil(t, sub)
+
+	// Saturate: one event sits in the buffer, the second must wait.
+	b.Publish(pubsub.UpdatedEvent, 1)
+
+	// PublishMustDeliver should block up to 20ms then drop.
+	start := time.Now()
+	b.PublishMustDeliver(t.Context(), pubsub.UpdatedEvent, 2)
+	elapsed := time.Since(start)
+
+	require.GreaterOrEqual(t, elapsed, 20*time.Millisecond,
+		"PublishMustDeliver should block at least the timeout under contention")
+	require.Less(t, elapsed, 200*time.Millisecond,
+		"PublishMustDeliver must not block indefinitely")
+	require.GreaterOrEqual(t, b.MustDeliverDropCount(), uint64(1),
+		"timeout must increment the must-deliver drop counter")
+}
+
+func TestBroker_PublishMustDeliverWithReader(t *testing.T) {
+	t.Parallel()
+
+	b := pubsub.NewBrokerWithOptions[int](1)
+	b.SetMustDeliverTimeout(50 * time.Millisecond)
+	defer b.Shutdown()
+
+	subCtx, cancel := context.WithCancel(t.Context())
+	defer cancel()
+	sub := b.Subscribe(subCtx)
+
+	var received atomic.Uint64
+	done := make(chan struct{})
+	go func() {
+		defer close(done)
+		for {
+			select {
+			case <-subCtx.Done():
+				return
+			case _, ok := <-sub:
+				if !ok {
+					return
+				}
+				received.Add(1)
+			}
+		}
+	}()
+
+	for i := range 10 {
+		b.PublishMustDeliver(t.Context(), pubsub.UpdatedEvent, i)
+	}
+
+	// All 10 should land within the must-deliver timeout window.
+	require.Eventually(t, func() bool { return received.Load() == 10 },
+		time.Second, 5*time.Millisecond,
+		"all must-deliver events should reach an active subscriber")
+	require.Zero(t, b.MustDeliverDropCount(),
+		"no drops expected when subscriber drains promptly")
+}
+
+func TestUpdate_TerminalEventUsesMustDeliver(t *testing.T) {
+	t.Parallel()
+
+	svc, sessionID := newTestService(t, WithDebounce(time.Hour))
+
+	subCtx, cancel := context.WithCancel(t.Context())
+	defer cancel()
+	sub := svc.Subscribe(subCtx)
+
+	var seenFinish atomic.Bool
+	done := make(chan struct{})
+	go func() {
+		defer close(done)
+		for {
+			select {
+			case <-subCtx.Done():
+				return
+			case ev, ok := <-sub:
+				if !ok {
+					return
+				}
+				if ev.Type == pubsub.UpdatedEvent && ev.Payload.IsFinished() {
+					seenFinish.Store(true)
+				}
+			}
+		}
+	}()
+
+	msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+	msg.AppendContent("final")
+	msg.AddFinish(FinishReasonEndTurn, "", "")
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	require.Eventually(t, func() bool { return seenFinish.Load() },
+		time.Second, 10*time.Millisecond,
+		"terminal update must reach subscribers via the must-deliver path")
+}
+
+func TestUpdate_ZeroDebounceFlushesEveryUpdate(t *testing.T) {
+	t.Parallel()
+
+	svc, sessionID := newTestService(t, WithDebounce(0))
+
+	msg, err := svc.Create(t.Context(), sessionID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+
+	for i := 0; i < 3; i++ {
+		msg.AppendContent("x")
+		require.NoError(t, svc.Update(t.Context(), msg))
+		got, err := svc.Get(t.Context(), msg.ID)
+		require.NoError(t, err)
+		require.Len(t, got.Content().Text, i+1, "every update must land synchronously when debounce is 0")
+	}
+}
+
+// TestFlush_WaitsForInFlightWrite reproduces the failure where Flush
+// or FlushAll could return before a concurrent in-flight SQL write
+// completed. We block UpdateMessage on a release channel, fire the
+// debounce timer, then call Flush and assert it does not return until
+// the in-flight write actually lands.
+func TestFlush_WaitsForInFlightWrite(t *testing.T) {
+	t.Parallel()
+
+	conn, err := db.Connect(t.Context(), t.TempDir())
+	require.NoError(t, err)
+	t.Cleanup(func() { _ = conn.Close() })
+
+	q := db.New(conn)
+	sessions := session.NewService(q, conn)
+	sess, err := sessions.Create(t.Context(), "test")
+	require.NoError(t, err)
+
+	slow := &slowUpdateQuerier{
+		Querier: q,
+		release: make(chan struct{}),
+		started: make(chan struct{}),
+	}
+	// Short debounce so the timer fires quickly.
+	svc := NewService(slow, WithDebounce(10*time.Millisecond))
+
+	msg, err := svc.Create(t.Context(), sess.ID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+	msg.AppendContent("payload")
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	// Wait for the timer-fired flush to enter UpdateMessage.
+	select {
+	case <-slow.started:
+	case <-time.After(time.Second):
+		t.Fatal("timer-fired flush never reached UpdateMessage")
+	}
+
+	// At this point the buffer is dirty=false but flushing=true. A
+	// naive Flush would early-return on !dirty. Spawn Flush in a
+	// goroutine and assert it has not returned while the write is
+	// still blocked.
+	flushDone := make(chan error, 1)
+	go func() { flushDone <- svc.Flush(t.Context(), msg.ID) }()
+
+	select {
+	case err := <-flushDone:
+		t.Fatalf("Flush returned %v while in-flight write was still blocked", err)
+	case <-time.After(50 * time.Millisecond):
+		// Expected: Flush is correctly waiting.
+	}
+
+	// Release the slow write; Flush must now return cleanly.
+	close(slow.release)
+	select {
+	case err := <-flushDone:
+		require.NoError(t, err)
+	case <-time.After(time.Second):
+		t.Fatal("Flush did not return after in-flight write completed")
+	}
+
+	// The SQL row should now reflect the buffered payload.
+	got, err := svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.Equal(t, "payload", got.Content().Text)
+}
+
+// TestFlushAll_WaitsForInFlightWrite asserts FlushAll picks up IDs
+// whose buffer is currently flushing (dirty=false) so shutdown and
+// session-switch callers don't return while an SQL write is mid-flight.
+func TestFlushAll_WaitsForInFlightWrite(t *testing.T) {
+	t.Parallel()
+
+	conn, err := db.Connect(t.Context(), t.TempDir())
+	require.NoError(t, err)
+	t.Cleanup(func() { _ = conn.Close() })
+
+	q := db.New(conn)
+	sessions := session.NewService(q, conn)
+	sess, err := sessions.Create(t.Context(), "test")
+	require.NoError(t, err)
+
+	slow := &slowUpdateQuerier{
+		Querier: q,
+		release: make(chan struct{}),
+		started: make(chan struct{}),
+	}
+	svc := NewService(slow, WithDebounce(10*time.Millisecond))
+
+	msg, err := svc.Create(t.Context(), sess.ID, CreateMessageParams{Role: Assistant})
+	require.NoError(t, err)
+	msg.AppendContent("payload")
+	require.NoError(t, svc.Update(t.Context(), msg))
+
+	select {
+	case <-slow.started:
+	case <-time.After(time.Second):
+		t.Fatal("timer-fired flush never reached UpdateMessage")
+	}
+
+	flushDone := make(chan error, 1)
+	go func() { flushDone <- svc.FlushAll(t.Context()) }()
+
+	select {
+	case err := <-flushDone:
+		t.Fatalf("FlushAll returned %v while in-flight write was still blocked", err)
+	case <-time.After(50 * time.Millisecond):
+	}
+
+	close(slow.release)
+	select {
+	case err := <-flushDone:
+		require.NoError(t, err)
+	case <-time.After(time.Second):
+		t.Fatal("FlushAll did not return after in-flight write completed")
+	}
+
+	got, err := svc.Get(t.Context(), msg.ID)
+	require.NoError(t, err)
+	require.Equal(t, "payload", got.Content().Text)
+}
+
+// TestUpdate_StructuralFlushUsesMustDeliver covers the second review
+// finding: structural terminal events (tool-call add, tool-call
+// finish, reasoning end) must publish via the must-deliver path even
+// when the message itself is not yet IsFinished.
+//
+// We detect which path was taken by saturating a subscriber's channel
+// buffer with no reader. With a short must-deliver timeout, the
+// must-deliver path increments [pubsub.Broker.MustDeliverDropCount]
+// after the timeout expires; the lossy path increments
+// [pubsub.Broker.DropCount] immediately. The two counters are
+// disjoint, so they precisely identify which call site published the
+// event.
+func TestUpdate_StructuralFlushUsesMustDeliver(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name string
+		mut  func(*Message)
+	}{
+		{
+			name: "tool call add",
+			mut: func(m *Message) {
+				m.AddToolCall(ToolCall{ID: "tc1", Name: "view"})
+			},
+		},
+		{
+			name: "tool call finish",
+			mut: func(m *Message) {
+				m.AddToolCall(ToolCall{ID: "tc1", Name: "view", Input: "{}", Finished: true})
+			},
+		},
+		{
+			name: "reasoning end",
+			mut: func(m *Message) {
+				m.AppendReasoningContent("hmm")
+				m.FinishThinking()
+			},
+		},
+	}
+
+	for _, tc := range cases {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+
+			conn, err := db.Connect(t.Context(), t.TempDir())
+			require.NoError(t, err)
+			t.Cleanup(func() { _ = conn.Close() })
+
+			q := db.New(conn)
+			sessions := session.NewService(q, conn)
+			sess, err := sessions.Create(t.Context(), "test")
+			require.NoError(t, err)
+
+			// Replace the default broker with a tiny buffer + short
+			// must-deliver timeout so we can fully saturate from a
+			// single sender and observe drops without long waits.
+			svc := NewService(q, WithDebounce(time.Hour))
+			impl := svc.(*service)
+			impl.Shutdown()
+			impl.Broker = pubsub.NewBrokerWithOptions[Message](1)
+			impl.SetMustDeliverTimeout(40 * time.Millisecond)
+
+			subCtx, cancel := context.WithCancel(t.Context())
+			defer cancel()
+			sub := svc.Subscribe(subCtx)
+
+			msg, err := svc.Create(t.Context(), sess.ID, CreateMessageParams{Role: Assistant})
+			require.NoError(t, err)
+
+			// Saturate the subscriber's buffer (capacity 1). The
+			// CreatedEvent from Create above already left one event
+			// in the buffer; we never read sub, so the next publish
+			// has nowhere to go.
+			_ = sub // intentionally not drained.
+
+			// Drive the structural change. With debounce=1h, Update
+			// flushes synchronously and routes through whichever
+			// publish path the service chose for structural events.
+			tc.mut(&msg)
+			require.NoError(t, svc.Update(t.Context(), msg))
+
+			// Must-deliver timeout (40ms) should have expired with
+			// no drain. If structural events are routed through
+			// must-deliver: MustDeliverDropCount > 0, DropCount
+			// unchanged. If routed through lossy Publish:
+			// DropCount > 0, MustDeliverDropCount == 0.
+			require.Eventually(t, func() bool {
+				return impl.MustDeliverDropCount() >= 1
+			}, time.Second, 5*time.Millisecond,
+				"structural terminal event should publish via must-deliver, not lossy Publish")
+			require.Zero(t, impl.DropCount(),
+				"structural terminal event must not be silently dropped via lossy Publish")
+		})
+	}
+}

internal/pubsub/broker.go 🔗

@@ -1,24 +1,59 @@
+// Package pubsub provides a lightweight in-process broker for fan-out
+// event delivery between services and the UI.
+//
+// Delivery semantics:
+//
+//   - [Broker.Publish] is best-effort and lossy under contention. If a
+//     subscriber's channel is full, the event is dropped for that
+//     subscriber, a warning is logged, and a counter is incremented.
+//     This is the right choice for high-frequency intermediate updates
+//     (e.g. streaming token deltas) where only the latest state
+//     matters.
+//
+//   - [Broker.PublishMustDeliver] is bounded-blocking. For each
+//     subscriber it first tries a non-blocking send, then falls back to
+//     a per-subscriber blocking send with a hard timeout. On timeout the
+//     event is dropped for that subscriber, an error is logged, and the
+//     must-deliver drop counter is incremented. The publisher never
+//     blocks indefinitely. This is the right choice for terminal events
+//     (finish, tool result, error, cancel) that must not be silently
+//     coalesced away.
+//
+// Drop counters ([Broker.DropCount], [Broker.MustDeliverDropCount]) are
+// exposed so callers can surface saturation in telemetry.
 package pubsub
 
 import (
 	"context"
 	"log/slog"
 	"sync"
+	"sync/atomic"
+	"time"
 )
 
-// bufferSize is the per-subscriber channel capacity for any broker
-// created via NewBroker.  Publish is non-blocking, so a full buffer
-// drops events (with a warning log); sized to cover a long streaming
-// assistant turn (~one UpdatedEvent per token) even under TUI render
-// stalls.
-const bufferSize = 4096
+const (
+	// bufferSize is the per-subscriber channel capacity for any broker
+	// created via NewBroker. Publish is non-blocking, so a full buffer
+	// drops events (with a warning log); sized to cover a long
+	// streaming assistant turn (~one UpdatedEvent per token) even under
+	// TUI render stalls.
+	bufferSize = 4096
+
+	// defaultMustDeliverTimeout is the per-subscriber upper bound on how
+	// long [Broker.PublishMustDeliver] will block waiting for buffer
+	// space before giving up on that subscriber.
+	defaultMustDeliverTimeout = 50 * time.Millisecond
+)
 
 type Broker[T any] struct {
-	subs              map[chan Event[T]]struct{}
-	mu                sync.RWMutex
-	done              chan struct{}
-	subCount          int
-	channelBufferSize int
+	subs                 map[chan Event[T]]struct{}
+	mu                   sync.RWMutex
+	done                 chan struct{}
+	subCount             int
+	channelBufferSize    int
+	mustDeliverTimeout   time.Duration
+	dropCount            atomic.Uint64
+	mustDeliverDropCount atomic.Uint64
 }
 
 func NewBroker[T any]() *Broker[T] {
@@ -27,12 +62,26 @@ func NewBroker[T any]() *Broker[T] {
 
 func NewBrokerWithOptions[T any](channelBufferSize int) *Broker[T] {
 	return &Broker[T]{
-		subs:              make(map[chan Event[T]]struct{}),
-		done:              make(chan struct{}),
-		channelBufferSize: channelBufferSize,
+		subs:               make(map[chan Event[T]]struct{}),
+		done:               make(chan struct{}),
+		channelBufferSize:  channelBufferSize,
+		mustDeliverTimeout: defaultMustDeliverTimeout,
 	}
 }
 
+// SetMustDeliverTimeout overrides the per-subscriber timeout used by
+// [Broker.PublishMustDeliver]. A zero or negative value resets to the
+// default. Intended primarily for tests.
+func (b *Broker[T]) SetMustDeliverTimeout(d time.Duration) {
+	b.mu.Lock()
+	defer b.mu.Unlock()
+	if d <= 0 {
+		b.mustDeliverTimeout = defaultMustDeliverTimeout
+		return
+	}
+	b.mustDeliverTimeout = d
+}
+
 func (b *Broker[T]) Shutdown() {
 	select {
 	case <-b.done: // Already closed
@@ -94,6 +143,25 @@ func (b *Broker[T]) GetSubscriberCount() int {
 	return b.subCount
 }
 
+// DropCount returns the cumulative number of events dropped by
+// [Broker.Publish] because a subscriber's channel was full.
+func (b *Broker[T]) DropCount() uint64 {
+	return b.dropCount.Load()
+}
+
+// MustDeliverDropCount returns the cumulative number of events dropped
+// by [Broker.PublishMustDeliver] after the per-subscriber timeout
+// expired.
+func (b *Broker[T]) MustDeliverDropCount() uint64 {
+	return b.mustDeliverDropCount.Load()
+}
+
+// Publish delivers an event to every active subscriber.
+//
+// Delivery is non-blocking and lossy: if a subscriber's channel is full
+// the event is dropped for that subscriber, a warning is logged, and
+// [Broker.DropCount] is incremented. Use [Broker.PublishMustDeliver]
+// for events that must not be silently dropped.
 func (b *Broker[T]) Publish(t EventType, payload T) {
 	b.mu.RLock()
 	defer b.mu.RUnlock()
@@ -111,8 +179,58 @@ func (b *Broker[T]) Publish(t EventType, payload T) {
 		case sub <- event:
 		default:
 			// Channel is full, subscriber is slow — skip this event.
-			// This prevents blocking the publisher.
+			// Lossy by design; counted and logged so saturation is
+			// observable.
+			b.dropCount.Add(1)
 			slog.Warn("Pubsub buffer full; dropping event", "type", t)
 		}
 	}
 }
+
+// PublishMustDeliver delivers an event with bounded-blocking semantics.
+// For each subscriber it first attempts a non-blocking send, then falls
+// back to a blocking send bounded by a per-subscriber timeout (default
+// [defaultMustDeliverTimeout]). On timeout the event is dropped for
+// that subscriber, [Broker.MustDeliverDropCount] is incremented, and an
+// error is logged. The publisher never blocks indefinitely.
+//
+// Use this for terminal events that must reach subscribers (finish,
+// tool result, error, cancel). Callers must still tolerate rare drops
+// after timeout — recovery is the subscriber's responsibility (e.g. a
+// re-fetch on the next session-visible event).
+func (b *Broker[T]) PublishMustDeliver(ctx context.Context, t EventType, payload T) {
+	b.mu.RLock()
+	defer b.mu.RUnlock()
+
+	select {
+	case <-b.done:
+		return
+	default:
+	}
+
+	event := Event[T]{Type: t, Payload: payload}
+	timeout := b.mustDeliverTimeout
+
+	for sub := range b.subs {
+		// Fast path: non-blocking send.
+		select {
+		case sub <- event:
+			continue
+		default:
+		}
+
+		// Slow path: bounded blocking send.
+		timer := time.NewTimer(timeout)
+		select {
+		case sub <- event:
+			timer.Stop()
+		case <-timer.C:
+			b.mustDeliverDropCount.Add(1)
+			slog.Error("PublishMustDeliver timed out delivering event",
+				"type", t, "timeout", timeout)
+		case <-ctx.Done():
+			timer.Stop()
+			return
+		}
+	}
+}

internal/ui/anim/anim.go 🔗

@@ -30,9 +30,14 @@ const (
 	// change every 8 frames (400 milliseconds).
 	ellipsisAnimSpeed = 8
 
-	// The maximum amount of time that can pass before a character appears.
-	// This is used to create a staggered entrance effect.
-	maxBirthOffset = time.Second
+	// The maximum number of animation steps that can pass before a
+	// character appears. With fps == 20 this is ~1s of staggered
+	// entrance, identical to the previous wall-clock-driven value.
+	// Switching from wall-clock + rand to a step-driven birth schedule
+	// keeps Render() deterministic: two Anim instances built from the
+	// same Settings produce byte-identical output when no Animate ticks
+	// have advanced their step counter.
+	maxBirthSteps = 20
 
 	// Number of frames to prerender for the animation. After this number
 	// of frames, the animation will loop. This only applies when color
@@ -107,12 +112,12 @@ type Anim struct {
 	label            *csync.Slice[string]
 	labelWidth       int
 	labelColor       color.Color
-	startTime        time.Time
-	birthOffsets     []time.Duration
+	birthSteps       []int
 	initialFrames    [][]string // frames for the initial characters
 	initialized      atomic.Bool
 	cyclingFrames    [][]string           // frames for the cycling characters
-	step             atomic.Int64         // current main frame step
+	step             atomic.Int64         // current main frame step (wraps)
+	framesSinceStart atomic.Int64         // total Animate ticks (does not wrap)
 	ellipsisStep     atomic.Int64         // current ellipsis frame step
 	ellipsisFrames   *csync.Slice[string] // ellipsis animation frames
 	id               string
@@ -140,7 +145,6 @@ func New(opts Settings) *Anim {
 	} else {
 		a.id = fmt.Sprintf("%d", nextID())
 	}
-	a.startTime = time.Now()
 	a.cyclingCharWidth = opts.Size
 	a.labelColor = opts.LabelColor
 
@@ -207,7 +211,13 @@ func New(opts Settings) *Anim {
 			}
 		}
 
-		// Prerender scrambled rune frames for the animation.
+		// Prerender scrambled rune frames for the animation. Seed
+		// the rune picker off the settings hash so cyclingFrames is
+		// a pure function of Settings: two processes with identical
+		// Settings populate the cache with the same glyphs, which
+		// keeps any cross-process golden-file comparison stable.
+		seed := xxh3.HashString(cacheKey)
+		rng := rand.New(rand.NewPCG(seed, ^seed))
 		a.cyclingFrames = make([][]string, numFrames)
 		offset = 0
 		for i := range a.cyclingFrames {
@@ -219,7 +229,7 @@ func New(opts Settings) *Anim {
 
 				// Also prerender the color with Lip Gloss here to avoid processing
 				// in the render loop.
-				r := availableRunes[rand.IntN(len(availableRunes))]
+				r := availableRunes[rng.IntN(len(availableRunes))]
 				a.cyclingFrames[i][j] = lipgloss.NewStyle().
 					Foreground(ramp[j+offset]).
 					Render(string(r))
@@ -249,10 +259,20 @@ func New(opts Settings) *Anim {
 		animCacheMap.Set(cacheKey, cached)
 	}
 
-	// Random assign a birth to each character for a stagged entrance effect.
-	a.birthOffsets = make([]time.Duration, a.width)
-	for i := range a.birthOffsets {
-		a.birthOffsets[i] = time.Duration(rand.N(int64(maxBirthOffset))) * time.Nanosecond
+	// Assign a deterministic birth step to each column for a
+	// staggered entrance effect. The schedule is seeded off the
+	// spinner id and the settings hash, so two spinners with the
+	// same role and identity stagger identically (this is what
+	// keeps Render() byte-equal across cache hits and across
+	// processes for the same Settings+ID) while spinners with
+	// different ids — distinct assistant messages, different tool
+	// calls, "Thinking" vs "Generating" labels — fade in with
+	// different patterns instead of marching in lock-step.
+	birthSeed := xxh3.HashString(a.id + "|" + cacheKey)
+	birthRng := rand.New(rand.NewPCG(birthSeed, ^birthSeed))
+	a.birthSteps = make([]int, a.width)
+	for i := range a.birthSteps {
+		a.birthSteps[i] = birthRng.IntN(maxBirthSteps)
 	}
 
 	return a
@@ -333,13 +353,14 @@ func (a *Anim) Animate(msg StepMsg) tea.Cmd {
 		a.step.Store(0)
 	}
 
+	frames := a.framesSinceStart.Add(1)
 	if a.initialized.Load() && a.labelWidth > 0 {
 		// Manage the ellipsis animation.
 		ellipsisStep := a.ellipsisStep.Add(1)
 		if int(ellipsisStep) >= ellipsisAnimSpeed*len(ellipsisFrames) {
 			a.ellipsisStep.Store(0)
 		}
-	} else if !a.initialized.Load() && time.Since(a.startTime) >= maxBirthOffset {
+	} else if !a.initialized.Load() && int(frames) >= maxBirthSteps {
 		a.initialized.Store(true)
 	}
 	return a.Step()
@@ -349,10 +370,11 @@ func (a *Anim) Animate(msg StepMsg) tea.Cmd {
 func (a *Anim) Render() string {
 	var b strings.Builder
 	step := int(a.step.Load())
+	frames := int(a.framesSinceStart.Load())
 	for i := range a.width {
 		switch {
-		case !a.initialized.Load() && i < len(a.birthOffsets) && time.Since(a.startTime) < a.birthOffsets[i]:
-			// Birth offset not reached: render initial character.
+		case !a.initialized.Load() && i < len(a.birthSteps) && frames < a.birthSteps[i]:
+			// Birth step not reached: render initial character.
 			b.WriteString(a.initialFrames[step][i])
 		case i < a.cyclingCharWidth:
 			// Render a cycling character.

internal/ui/chat/agent.go 🔗

@@ -53,11 +53,21 @@ func NewAgentToolMessageItem(
 }
 
 // Animate progresses the message animation if it should be spinning.
+//
+// Bumps the parent's F6 list-cache version on both the parent-tick and
+// nested-tick branches. Nested tools are not list entries of their
+// own — their IDs map to this parent's index in idInxMap
+// (internal/ui/model/chat.go:240-246) and their renders are embedded
+// inline in this parent's output — so the list only checks the
+// parent's version. Without the bump, the list cache would serve the
+// previously rendered frame indefinitely and the spinner would appear
+// frozen.
 func (a *AgentToolMessageItem) Animate(msg anim.StepMsg) tea.Cmd {
 	if a.result != nil || a.Status() == ToolStatusCanceled {
 		return nil
 	}
 	if msg.ID == a.ID() {
+		a.Bump()
 		return a.anim.Animate(msg)
 	}
 	for _, nestedTool := range a.nestedTools {
@@ -65,6 +75,7 @@ func (a *AgentToolMessageItem) Animate(msg anim.StepMsg) tea.Cmd {
 			continue
 		}
 		if s, ok := nestedTool.(Animatable); ok {
+			a.Bump()
 			return s.Animate(msg)
 		}
 	}
@@ -77,9 +88,23 @@ func (a *AgentToolMessageItem) NestedTools() []ToolMessageItem {
 }
 
 // SetNestedTools sets the nested tools.
+//
+// SetNestedTools always bumps the version. The previous design
+// deduped when the slice's length and element pointers were
+// unchanged, but the live update path in internal/ui/model/ui.go
+// mutates existing children in place (SetToolCall / SetResult on the
+// same pointers) and then calls SetNestedTools with the same slice.
+// Pointer-equality dedupe in that case skips the parent Bump even
+// though the parent's rendered output (which embeds the children
+// inline) has changed, leaving a stale parent entry in the list
+// cache. Always bumping is cheap (one uint64 increment) and called
+// at most once per agent event; in the rare case the slice is
+// truly unchanged the worst case is one extra parent re-render
+// while every child cache hit stays warm.
 func (a *AgentToolMessageItem) SetNestedTools(tools []ToolMessageItem) {
 	a.nestedTools = tools
 	a.clearCache()
+	a.Bump()
 }
 
 // AddNestedTool adds a nested tool.
@@ -90,6 +115,7 @@ func (a *AgentToolMessageItem) AddNestedTool(tool ToolMessageItem) {
 	}
 	a.nestedTools = append(a.nestedTools, tool)
 	a.clearCache()
+	a.Bump()
 }
 
 // AgentToolRenderContext renders agent tool messages.
@@ -196,15 +222,44 @@ func NewAgenticFetchToolMessageItem(
 	return t
 }
 
+// Animate progresses the message animation if it should be spinning.
+// See [AgentToolMessageItem.Animate] for the parent-bump rationale —
+// without an override, the embedded base.Animate would (a) drop
+// StepMsgs whose ID matches a nested child instead of the parent
+// (anim.Animate's ID check at internal/ui/anim/anim.go:326-329
+// silently returns nil), and (b) never invalidate the parent's
+// list-cache entry on a parent tick.
+func (a *AgenticFetchToolMessageItem) Animate(msg anim.StepMsg) tea.Cmd {
+	if a.result != nil || a.Status() == ToolStatusCanceled {
+		return nil
+	}
+	if msg.ID == a.ID() {
+		a.Bump()
+		return a.anim.Animate(msg)
+	}
+	for _, nestedTool := range a.nestedTools {
+		if msg.ID != nestedTool.ID() {
+			continue
+		}
+		if s, ok := nestedTool.(Animatable); ok {
+			a.Bump()
+			return s.Animate(msg)
+		}
+	}
+	return nil
+}
+
 // NestedTools returns the nested tools.
 func (a *AgenticFetchToolMessageItem) NestedTools() []ToolMessageItem {
 	return a.nestedTools
 }
 
-// SetNestedTools sets the nested tools.
+// SetNestedTools sets the nested tools. Always bumps the version;
+// see [AgentToolMessageItem.SetNestedTools] for the rationale.
 func (a *AgenticFetchToolMessageItem) SetNestedTools(tools []ToolMessageItem) {
 	a.nestedTools = tools
 	a.clearCache()
+	a.Bump()
 }
 
 // AddNestedTool adds a nested tool.
@@ -215,6 +270,7 @@ func (a *AgenticFetchToolMessageItem) AddNestedTool(tool ToolMessageItem) {
 	}
 	a.nestedTools = append(a.nestedTools, tool)
 	a.clearCache()
+	a.Bump()
 }
 
 // AgenticFetchToolRenderContext renders agentic fetch tool messages.

internal/ui/chat/applyhighlight_callback_test.go 🔗

@@ -0,0 +1,207 @@
+package chat
+
+import (
+	"testing"
+
+	"github.com/charmbracelet/crush/internal/message"
+	"github.com/charmbracelet/crush/internal/ui/list"
+	"github.com/charmbracelet/crush/internal/ui/styles"
+	"github.com/stretchr/testify/require"
+)
+
+// renderCountingItem wraps a real chat item and counts Render calls
+// to expose the list-level cache behaviour to tests. The wrapper
+// forwards the list.Item methods exercised by this test — Render,
+// Version, Finished — plus the list.Highlightable surface
+// (SetHighlight / Highlight) used by the callback-driven scenario.
+// Focus is not exercised here, so list.Focusable is not forwarded;
+// add SetFocused/IsFocused if a future test needs to drive focus
+// through the wrapper.
+type renderCountingItem struct {
+	inner       MessageItem
+	renderHits  int
+	highlightCb func(start [4]int)
+}
+
+func newRenderCountingItem(inner MessageItem) *renderCountingItem {
+	return &renderCountingItem{inner: inner}
+}
+
+func (r *renderCountingItem) Render(width int) string {
+	r.renderHits++
+	return r.inner.Render(width)
+}
+
+func (r *renderCountingItem) Version() uint64 {
+	return r.inner.(versionedItem).Version()
+}
+
+func (r *renderCountingItem) Finished() bool {
+	return r.inner.Finished()
+}
+
+// SetHighlight forwards to the embedded item; the underlying
+// highlightableMessageItem dedupes equivalent ranges and bumps the
+// shared version on observable change.
+func (r *renderCountingItem) SetHighlight(startLine, startCol, endLine, endCol int) {
+	if h, ok := r.inner.(list.Highlightable); ok {
+		h.SetHighlight(startLine, startCol, endLine, endCol)
+		if r.highlightCb != nil {
+			r.highlightCb([4]int{startLine, startCol, endLine, endCol})
+		}
+	}
+}
+
+func (r *renderCountingItem) Highlight() (int, int, int, int) {
+	if h, ok := r.inner.(list.Highlightable); ok {
+		return h.Highlight()
+	}
+	return -1, -1, -1, -1
+}
+
+// TestList_CallbackDrivenHighlightUnfreezeAndReFreeze covers F6
+// §4.5.1 along the live applyHighlightRange path. Instead of
+// driving BeginSelectionDrag directly, the test registers a render
+// callback that mutates the chat items' highlight ranges (just like
+// Chat.applyHighlightRange does in production) and verifies the
+// resulting cache behaviour:
+//
+//   - Items inside the active range pick up a SetHighlight call,
+//     their version bumps, the F6 cache invalidates, and the list
+//     re-renders them on the next draw. The post-render entry is
+//     frozen again because the items are Finished() — but their
+//     stored output now reflects the highlight.
+//   - Subsequent draws while the range is unchanged are cache hits:
+//     the callback's SetHighlight call dedupes (same range), the
+//     version is stable, and the list serves the previous output
+//     verbatim without calling Render.
+//   - When the range moves OFF an item, the callback clears the
+//     highlight, the version bumps, and the item re-renders. After
+//     that single re-render the entry re-freezes; further draws are
+//     cache hits.
+func TestList_CallbackDrivenHighlightUnfreezeAndReFreeze(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+
+	// Build three finished assistant messages so all three are
+	// candidates for freezing. Real items (per Round 2 spec) — the
+	// surrounding renderCountingItem wrapper just lets the test see
+	// per-item Render calls.
+	mk := func(id, body string) *renderCountingItem {
+		msg := &message.Message{
+			ID:   id,
+			Role: message.Assistant,
+			Parts: []message.ContentPart{
+				message.ReasoningContent{
+					Thinking:   "thinking",
+					StartedAt:  testStartedAt,
+					FinishedAt: testFinishedAt,
+				},
+				message.TextContent{Text: body},
+				message.Finish{Reason: message.FinishReasonEndTurn, Time: testFinishTime},
+			},
+		}
+		inner := NewAssistantMessageItem(&sty, msg)
+		require.True(t, inner.Finished(), "test fixture must be Finished()")
+		return newRenderCountingItem(inner)
+	}
+
+	a := mk("a", "alpha")
+	b := mk("b", "bravo")
+	c := mk("c", "charlie")
+
+	l := list.NewList(a, b, c)
+	l.SetSize(80, 30)
+
+	// activeRange holds the inclusive [start, end] item indexes the
+	// callback should highlight. -1 means no active selection.
+	activeRange := [2]int{-1, -1}
+
+	cb := func(idx, _ int, item list.Item) list.Item {
+		hi, ok := item.(list.Highlightable)
+		if !ok {
+			return item
+		}
+		if activeRange[0] >= 0 && idx >= activeRange[0] && idx <= activeRange[1] {
+			// Inside the range: highlight the entire item.
+			hi.SetHighlight(0, 0, -1, -1)
+		} else {
+			// Outside the range: clear highlight.
+			hi.SetHighlight(-1, -1, -1, -1)
+		}
+		return item
+	}
+	l.RegisterRenderCallback(cb)
+
+	// First render populates the cache. Each item renders exactly
+	// once even though the callback runs for all three.
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits, "first render: a renders once")
+	require.Equal(t, 1, b.renderHits, "first render: b renders once")
+	require.Equal(t, 1, c.renderHits, "first render: c renders once")
+
+	// Subsequent renders without an active range are cache hits.
+	// The callback's SetHighlight call dedupes (already cleared),
+	// no version bump, frozen entries served verbatim.
+	for range 3 {
+		_ = l.Render()
+	}
+	require.Equal(t, 1, a.renderHits, "frozen item must not re-render across stable draws")
+	require.Equal(t, 1, b.renderHits, "frozen item must not re-render across stable draws")
+	require.Equal(t, 1, c.renderHits, "frozen item must not re-render across stable draws")
+
+	// Activate a selection range over items a and b. The callback
+	// will SetHighlight on both during the next render, bumping
+	// their versions. The cache hit fails (version mismatch) and
+	// each in-range item re-renders exactly once.
+	activeRange = [2]int{0, 1}
+	_ = l.Render()
+	require.Equal(t, 2, a.renderHits, "in-range item must re-render after SetHighlight")
+	require.Equal(t, 2, b.renderHits, "in-range item must re-render after SetHighlight")
+	require.Equal(t, 1, c.renderHits, "out-of-range item stays frozen")
+
+	// Verify the highlight actually landed on the in-range items.
+	sLine, _, eLine, _ := a.Highlight()
+	require.Equal(t, 0, sLine)
+	require.Equal(t, -1, eLine)
+	sLine, _, eLine, _ = c.Highlight()
+	require.Equal(t, -1, sLine, "out-of-range item must not be highlighted")
+	require.Equal(t, -1, eLine)
+
+	// While the range stays the same, subsequent renders are cache
+	// hits. The callback dedupes (same range), no version bump,
+	// the post-render entry served verbatim. Note: items are
+	// re-frozen because they're still Finished() and not in the
+	// list's freezeSuppressed set.
+	for range 3 {
+		_ = l.Render()
+	}
+	require.Equal(t, 2, a.renderHits, "in-range item re-freezes after the highlight render")
+	require.Equal(t, 2, b.renderHits, "in-range item re-freezes after the highlight render")
+	require.Equal(t, 1, c.renderHits, "out-of-range item stays frozen")
+
+	// Move the range off the items entirely. The callback clears
+	// each in-range item's highlight back to (-1,-1,-1,-1), which
+	// bumps their versions and triggers exactly one re-render
+	// each. After that, the entries re-freeze.
+	activeRange = [2]int{-1, -1}
+	_ = l.Render()
+	require.Equal(t, 3, a.renderHits, "exiting-range item must re-render once when highlight clears")
+	require.Equal(t, 3, b.renderHits, "exiting-range item must re-render once when highlight clears")
+	require.Equal(t, 1, c.renderHits, "never-highlighted item stays frozen")
+
+	// Confirm the highlight has been fully cleared.
+	sLine, _, eLine, _ = a.Highlight()
+	require.Equal(t, -1, sLine)
+	require.Equal(t, -1, eLine)
+
+	// And subsequent renders are cache hits again — the items
+	// re-froze.
+	for range 3 {
+		_ = l.Render()
+	}
+	require.Equal(t, 3, a.renderHits, "re-frozen item must not re-render across stable draws")
+	require.Equal(t, 3, b.renderHits, "re-frozen item must not re-render across stable draws")
+	require.Equal(t, 1, c.renderHits, "never-highlighted item stays frozen")
+}

internal/ui/chat/assistant.go 🔗

@@ -1,7 +1,9 @@
 package chat
 
 import (
+	"encoding/binary"
 	"fmt"
+	"hash/fnv"
 	"strings"
 
 	tea "charm.land/bubbletea/v2"
@@ -9,21 +11,116 @@ import (
 	"github.com/charmbracelet/crush/internal/message"
 	"github.com/charmbracelet/crush/internal/ui/anim"
 	"github.com/charmbracelet/crush/internal/ui/common"
+	"github.com/charmbracelet/crush/internal/ui/list"
 	"github.com/charmbracelet/crush/internal/ui/styles"
 	"github.com/charmbracelet/x/ansi"
 )
 
 // assistantMessageTruncateFormat is the text shown when an assistant message is
-// truncated.
+// truncated in the collapsed state.
 const assistantMessageTruncateFormat = "… (%d lines hidden) [click or space to expand]"
 
+// assistantMessageTailWindowFormat is shown above a tail-windowed thinking
+// block to advertise that earlier lines exist and that the user can
+// promote the view to a full expansion. The promotion is wired through
+// the existing ToggleExpanded path (click / space) — F5 deliberately
+// does not add a new keybinding.
+const assistantMessageTailWindowFormat = "… %d earlier lines hidden [click or space for full view]"
+
 // maxCollapsedThinkingHeight defines the maximum height of the thinking
 const maxCollapsedThinkingHeight = 10
 
+// maxExpandedThinkingTailLines is the F5 tail-window cap. When the user
+// expands a thinking block whose post-glamour line count exceeds this
+// threshold, only the last N lines are shown with an affordance line
+// indicating how many earlier lines are hidden. Clicking / pressing
+// space again promotes the view to a full expansion. The slice is
+// taken AFTER glamour render (not before) so fenced code blocks,
+// lists, and tables are not torn at arbitrary boundaries.
+const maxExpandedThinkingTailLines = 200
+
+// thinkingViewMode is the F5 three-state view machine for the thinking
+// block. ToggleExpanded cycles
+// collapsed → tail-window → full-expanded → collapsed, skipping the
+// tail-window step when the rendered thinking fits within the cap so
+// short blocks still toggle in two clicks.
+type thinkingViewMode uint8
+
+const (
+	thinkingCollapsed thinkingViewMode = iota
+	thinkingTailWindow
+	thinkingFullExpanded
+)
+
+// assistantSection is a per-section render cache for AssistantMessageItem.
+// Each section (thinking, content, error) carries its own keys so that
+// streaming a section does not invalidate a different — often more
+// expensive — section's cached render. srcHash is an FNV-64 of the
+// section's source text; extra captures any other state that changes
+// the rendered output (e.g. thinkingExpanded, the thinking footer
+// inputs). valid disambiguates a real cache hit from the zero value
+// when both source text and extras hash to zero. aux carries any
+// per-section side data that the caller needs to recover on a hit
+// (e.g. the thinking box height for click detection).
+type assistantSection struct {
+	width   int
+	srcHash uint64
+	extra   uint64
+	out     string
+	h       int
+	aux     int
+	valid   bool
+}
+
+// hit reports whether the cache entry matches the requested key.
+func (s *assistantSection) hit(width int, srcHash, extra uint64) bool {
+	return s.valid && s.width == width && s.srcHash == srcHash && s.extra == extra
+}
+
+// store records the rendered output under the given key.
+func (s *assistantSection) store(width int, srcHash, extra uint64, out string, aux int) {
+	s.width = width
+	s.srcHash = srcHash
+	s.extra = extra
+	s.out = out
+	s.h = lipgloss.Height(out)
+	s.aux = aux
+	s.valid = true
+}
+
+// reset drops the cached output.
+func (s *assistantSection) reset() {
+	*s = assistantSection{}
+}
+
+// fnv64 hashes a single string with FNV-64.
+func fnv64(s string) uint64 {
+	h := fnv.New64a()
+	_, _ = h.Write([]byte(s))
+	return h.Sum64()
+}
+
+// fnvFields hashes a list of byte fields with length-prefix framing
+// so that no concatenation collision can occur between distinct
+// field tuples (a NUL inside one field cannot impersonate a
+// boundary between two fields). Each field is preceded by its
+// length encoded as 8 bytes little-endian.
+func fnvFields(fields ...[]byte) uint64 {
+	h := fnv.New64a()
+	var lenBuf [8]byte
+	for _, f := range fields {
+		binary.LittleEndian.PutUint64(lenBuf[:], uint64(len(f)))
+		_, _ = h.Write(lenBuf[:])
+		_, _ = h.Write(f)
+	}
+	return h.Sum64()
+}
+
 // AssistantMessageItem represents an assistant message in the chat UI.
 //
 // This item includes thinking, and the content but does not include the tool calls.
 type AssistantMessageItem struct {
+	*list.Versioned
 	*highlightableMessageItem
 	*cachedMessageItem
 	*focusableMessageItem
@@ -31,18 +128,34 @@ type AssistantMessageItem struct {
 	message           *message.Message
 	sty               *styles.Styles
 	anim              *anim.Anim
-	thinkingExpanded  bool
+	thinkingViewMode  thinkingViewMode
 	thinkingBoxHeight int // Tracks the rendered thinking box height for click detection.
+
+	// Per-section render caches. Splitting these out means content
+	// streaming does not invalidate the (often expensive) thinking
+	// render, and vice versa.
+	thinkingSec assistantSection
+	contentSec  assistantSection
+	errorSec    assistantSection
+
+	// streamingContent caches a "stable prefix" glamour render of
+	// the assistant content body so each streaming flush only
+	// re-renders the trailing partial. F8 of
+	// docs/notes/2026-05-12-chat-rendering-perf.md. See
+	// streaming_markdown.go for the full algorithm.
+	streamingContent streamingMarkdown
 }
 
 var _ Expandable = (*AssistantMessageItem)(nil)
 
 // NewAssistantMessageItem creates a new AssistantMessageItem.
 func NewAssistantMessageItem(sty *styles.Styles, message *message.Message) MessageItem {
+	v := list.NewVersioned()
 	a := &AssistantMessageItem{
-		highlightableMessageItem: defaultHighlighter(sty),
+		Versioned:                v,
+		highlightableMessageItem: defaultHighlighter(sty, v),
 		cachedMessageItem:        &cachedMessageItem{},
-		focusableMessageItem:     &focusableMessageItem{},
+		focusableMessageItem:     newFocusableMessageItem(v),
 		message:                  message,
 		sty:                      sty,
 	}
@@ -71,6 +184,13 @@ func (a *AssistantMessageItem) Animate(msg anim.StepMsg) tea.Cmd {
 	if !a.isSpinning() {
 		return nil
 	}
+	// Bump the F6 list-cache version so the next draw re-renders
+	// this item: a spinner tick mutates anim's internal frame
+	// counter, which changes the rendered output but is invisible
+	// to the per-section content hashes. Without the bump the
+	// list cache would serve the previously rendered frame
+	// indefinitely and the spinner would appear frozen.
+	a.Bump()
 	return a.anim.Animate(msg)
 }
 
@@ -88,14 +208,7 @@ func (a *AssistantMessageItem) RawRender(width int) string {
 		spinner = a.renderSpinning()
 	}
 
-	content, height, ok := a.getCachedRender(cappedWidth)
-	if !ok {
-		content = a.renderMessageContent(cappedWidth)
-		height = lipgloss.Height(content)
-		// cache the rendered content
-		a.setCachedRender(content, cappedWidth, height)
-	}
-
+	content, height := a.renderMessageContent(cappedWidth)
 	highlightedContent := a.renderHighlighted(content, cappedWidth, height)
 	if spinner != "" {
 		if highlightedContent != "" {
@@ -114,6 +227,23 @@ func (a *AssistantMessageItem) Render(width int) string {
 	// it's wrapping logic.
 	// We already know that the content is wrapped to the correct width in
 	// RawRender, so we can just apply the styles directly to each line.
+	//
+	// The split + per-line prefix loop is O(L); cache the result keyed
+	// by (width, focused, sectionsFingerprint) so steady-state Render
+	// becomes a pointer return. The sectionsFingerprint folds in the
+	// per-section srcHash/extra so that any sub-cache change
+	// invalidates this prefix cache without requiring an explicit
+	// drop. Bypass the cache while spinning (RawRender's spinner
+	// suffix changes every animation frame) or while a highlight
+	// range is active (selection drag).
+	useCache := !a.isSpinning() && !a.isHighlighted()
+	cappedWidth := cappedMessageWidth(width)
+	key := a.prefixCacheKey(cappedWidth)
+	if useCache {
+		if cached, ok := a.getCachedPrefixedRender(width, key); ok {
+			return cached
+		}
+	}
 	focused := a.sty.Messages.AssistantFocused.Render()
 	blurred := a.sty.Messages.AssistantBlurred.Render()
 	rendered := a.RawRender(width)
@@ -125,45 +255,202 @@ func (a *AssistantMessageItem) Render(width int) string {
 			lines[i] = blurred + line
 		}
 	}
-	return strings.Join(lines, "\n")
+	out := strings.Join(lines, "\n")
+	if useCache {
+		a.setCachedPrefixedRender(out, width, key)
+	}
+	return out
+}
+
+// prefixCacheKey builds the F3 prefixed-render cache key. We pack the
+// focus bit into bit 0 and a fingerprint of the section caches into
+// the upper bits, so any change to a sub-section's source text or
+// extras forces the prefix cache to miss without needing an explicit
+// drop. cappedWidth is included so a cached prefix never survives a
+// section-cache miss caused by a width change. The finish reason is
+// folded in too because it controls the composition of
+// renderMessageContent (e.g. appending the constant "Canceled"
+// string) — that decision lives outside any section's own hash.
+func (a *AssistantMessageItem) prefixCacheKey(cappedWidth int) uint64 {
+	thinkSrc, thinkExtra := a.thinkingKey()
+	contentSrc, contentExtra := a.contentKey()
+	errSrc, errExtra := a.errorKey()
+	h := fnv.New64a()
+	var buf [8]byte
+	writeU64 := func(v uint64) {
+		for i := range 8 {
+			buf[i] = byte(v >> (8 * i))
+		}
+		_, _ = h.Write(buf[:])
+	}
+	writeU64(uint64(cappedWidth))
+	writeU64(thinkSrc)
+	writeU64(thinkExtra)
+	writeU64(contentSrc)
+	writeU64(contentExtra)
+	writeU64(errSrc)
+	writeU64(errExtra)
+	writeU64(a.compositionKey())
+	fingerprint := h.Sum64()
+	var focusBit uint64
+	if a.focused {
+		focusBit = 1
+	}
+	return (fingerprint &^ 1) | focusBit
 }
 
-// renderMessageContent renders the message content including thinking, main content, and finish reason.
-func (a *AssistantMessageItem) renderMessageContent(width int) string {
+// compositionKey hashes the inputs to renderMessageContent's structural
+// decisions (which sections to include, whether to append the
+// constant "Canceled" footer) so that flipping IsFinished or the
+// finish reason invalidates the prefix cache even when no section's
+// own source text changed.
+func (a *AssistantMessageItem) compositionKey() uint64 {
+	var finishedFlag byte
+	var reason string
+	if a.message.IsFinished() {
+		finishedFlag = 1
+		reason = string(a.message.FinishReason())
+	}
+	// Length-prefixed framing keeps the finished flag and the reason
+	// string from blending into one another.
+	return fnvFields([]byte{finishedFlag}, []byte(reason))
+}
+
+// renderMessageContent renders the message content including thinking, main
+// content, and finish reason. Each section is served from its own cache;
+// only the section whose source text or extras changed since the last
+// render is recomputed.
+func (a *AssistantMessageItem) renderMessageContent(width int) (string, int) {
 	var messageParts []string
 	thinking := strings.TrimSpace(a.message.ReasoningContent().Thinking)
 	content := strings.TrimSpace(a.message.Content().Text)
-	// if the massage has reasoning content add that first
+
 	if thinking != "" {
-		messageParts = append(messageParts, a.renderThinking(a.message.ReasoningContent().Thinking, width))
+		messageParts = append(messageParts, a.cachedThinking(width))
 	}
 
-	// then add the main content
 	if content != "" {
-		// add a spacer between thinking and content
 		if thinking != "" {
 			messageParts = append(messageParts, "")
 		}
-		messageParts = append(messageParts, a.renderMarkdown(content, width))
+		messageParts = append(messageParts, a.cachedContent(width))
 	}
 
-	// finally add any finish reason info
 	if a.message.IsFinished() {
 		switch a.message.FinishReason() {
 		case message.FinishReasonCanceled:
 			messageParts = append(messageParts, a.sty.Messages.AssistantCanceled.Render("Canceled"))
 		case message.FinishReasonError:
-			messageParts = append(messageParts, a.renderError(width))
+			messageParts = append(messageParts, a.cachedError(width))
 		}
 	}
 
-	return strings.Join(messageParts, "\n")
+	out := strings.Join(messageParts, "\n")
+	return out, lipgloss.Height(out)
+}
+
+// thinkingKey returns the (srcHash, extra) cache key components for the
+// thinking section. extra folds in everything other than the raw
+// thinking text that affects the rendered output: the view mode
+// (collapsed / tail-window / full) and the footer state (which
+// depends on IsThinking, ToolCalls, and ThinkingDuration).
+func (a *AssistantMessageItem) thinkingKey() (uint64, uint64) {
+	thinking := a.message.ReasoningContent().Thinking
+	srcHash := fnv64(thinking)
+
+	showFooter := !a.message.IsThinking() || len(a.message.ToolCalls()) > 0
+	var durationStr string
+	if showFooter {
+		duration := a.message.ThinkingDuration()
+		if duration.String() != "0s" {
+			durationStr = duration.String()
+		}
+	}
+	var footer byte
+	if showFooter {
+		footer = 1
+	}
+	// Length-prefixed framing avoids any delimiter collision between
+	// the flag bytes and the duration string. The view mode is folded
+	// in so that toggling collapsed ↔ tail-window ↔ full invalidates
+	// only the thinking section, not content/error.
+	extra := fnvFields([]byte{byte(a.thinkingViewMode), footer}, []byte(durationStr))
+	return srcHash, extra
+}
+
+// contentKey returns the (srcHash, extra) cache key components for the
+// main content section.
+func (a *AssistantMessageItem) contentKey() (uint64, uint64) {
+	return fnv64(a.message.Content().Text), 0
+}
+
+// errorKey returns the (srcHash, extra) cache key components for the
+// error section. Returns (0, 0) when no error is present so the cache
+// stays a no-op for non-error messages.
+func (a *AssistantMessageItem) errorKey() (uint64, uint64) {
+	if !a.message.IsFinished() || a.message.FinishReason() != message.FinishReasonError {
+		return 0, 0
+	}
+	finishPart := a.message.FinishPart()
+	if finishPart == nil {
+		return 0, 0
+	}
+	// Length-prefixed framing prevents Message+Details collisions
+	// between distinct (Message, Details) tuples that would
+	// otherwise concatenate to the same byte sequence.
+	return fnvFields([]byte(finishPart.Message), []byte(finishPart.Details)), 0
+}
+
+// cachedThinking returns the rendered thinking section, computing and
+// caching it on miss. The thinking-box height (used for click target
+// detection) is preserved across hits via assistantSection.aux so the
+// cached path never desyncs click detection.
+func (a *AssistantMessageItem) cachedThinking(width int) string {
+	srcHash, extra := a.thinkingKey()
+	if a.thinkingSec.hit(width, srcHash, extra) {
+		a.thinkingBoxHeight = a.thinkingSec.aux
+		return a.thinkingSec.out
+	}
+	out := a.renderThinking(a.message.ReasoningContent().Thinking, width)
+	a.thinkingSec.store(width, srcHash, extra, out, a.thinkingBoxHeight)
+	return out
+}
+
+// cachedContent returns the rendered content section.
+func (a *AssistantMessageItem) cachedContent(width int) string {
+	srcHash, extra := a.contentKey()
+	if a.contentSec.hit(width, srcHash, extra) {
+		return a.contentSec.out
+	}
+	out := a.renderMarkdown(a.message.Content().Text, width)
+	a.contentSec.store(width, srcHash, extra, out, 0)
+	return out
+}
+
+// cachedError returns the rendered error section.
+func (a *AssistantMessageItem) cachedError(width int) string {
+	srcHash, extra := a.errorKey()
+	if a.errorSec.hit(width, srcHash, extra) {
+		return a.errorSec.out
+	}
+	out := a.renderError(width)
+	a.errorSec.store(width, srcHash, extra, out, 0)
+	return out
 }
 
 // renderThinking renders the thinking/reasoning content with footer.
+//
+// Slicing happens AFTER glamour rendering so fenced code blocks, list
+// continuations, and tables are not split mid-block — the same
+// boundary problem §4.4 of the design note flags. The bordered
+// ThinkingBox style is applied on top of the (already-windowed)
+// lines so the visual box matches what the user sees today.
 func (a *AssistantMessageItem) renderThinking(thinking string, width int) string {
 	renderer := common.QuietMarkdownRenderer(a.sty, width)
+	mu := common.LockMarkdownRenderer(renderer)
+	mu.Lock()
 	rendered, err := renderer.Render(thinking)
+	mu.Unlock()
 	if err != nil {
 		rendered = thinking
 	}
@@ -172,13 +459,23 @@ func (a *AssistantMessageItem) renderThinking(thinking string, width int) string
 	lines := strings.Split(rendered, "\n")
 	totalLines := len(lines)
 
-	isTruncated := totalLines > maxCollapsedThinkingHeight
-	if !a.thinkingExpanded && isTruncated {
-		lines = lines[totalLines-maxCollapsedThinkingHeight:]
-		hint := a.sty.Messages.ThinkingTruncationHint.Render(
-			fmt.Sprintf(assistantMessageTruncateFormat, totalLines-maxCollapsedThinkingHeight),
-		)
-		lines = append([]string{hint, ""}, lines...)
+	switch a.thinkingViewMode {
+	case thinkingCollapsed:
+		if totalLines > maxCollapsedThinkingHeight {
+			lines = lines[totalLines-maxCollapsedThinkingHeight:]
+			hint := a.sty.Messages.ThinkingTruncationHint.Render(
+				fmt.Sprintf(assistantMessageTruncateFormat, totalLines-maxCollapsedThinkingHeight),
+			)
+			lines = append([]string{hint, ""}, lines...)
+		}
+	case thinkingTailWindow:
+		if totalLines > maxExpandedThinkingTailLines {
+			lines = lines[totalLines-maxExpandedThinkingTailLines:]
+			hint := a.sty.Messages.ThinkingTruncationHint.Render(
+				fmt.Sprintf(assistantMessageTailWindowFormat, totalLines-maxExpandedThinkingTailLines),
+			)
+			lines = append([]string{hint, ""}, lines...)
+		}
 	}
 
 	thinkingStyle := a.sty.Messages.ThinkingBox.Width(width)
@@ -202,14 +499,18 @@ func (a *AssistantMessageItem) renderThinking(thinking string, width int) string
 	return result
 }
 
-// renderMarkdown renders content as markdown.
+// renderMarkdown renders content as markdown. F8 routes the call
+// through streamingContent, which caches the glamour render of a
+// "stable prefix" so each streaming flush only re-renders the
+// trailing partial. The streaming cache invalidates itself on
+// width change and on any content that is not a prefix-extension
+// of the previously rendered content (e.g. user retried the
+// turn), and falls back to a full render whenever boundary
+// detection has the slightest doubt — see
+// findSafeMarkdownBoundary.
 func (a *AssistantMessageItem) renderMarkdown(content string, width int) string {
 	renderer := common.MarkdownRenderer(a.sty, width)
-	result, err := renderer.Render(content)
-	if err != nil {
-		return content
-	}
-	return strings.TrimSuffix(result, "\n")
+	return a.streamingContent.Render(content, width, renderer)
 }
 
 func (a *AssistantMessageItem) renderSpinning() string {
@@ -240,23 +541,107 @@ func (a *AssistantMessageItem) isSpinning() bool {
 	return (isThinking || !isFinished) && !hasContent && !hasToolCalls
 }
 
-// SetMessage is used to update the underlying message.
-func (a *AssistantMessageItem) SetMessage(message *message.Message) tea.Cmd {
+// SetMessage is used to update the underlying message. Only the
+// sub-section caches whose source text or extras changed are
+// invalidated; the others survive and serve cache hits on the next
+// RawRender.
+func (a *AssistantMessageItem) SetMessage(msg *message.Message) tea.Cmd {
 	wasSpinning := a.isSpinning()
-	a.message = message
-	a.clearCache()
+	a.message = msg
+	// Bump the F6 version even if the underlying *message.Message
+	// pointer is identical: callers may have mutated the message in
+	// place (delta append) and we cannot tell from here. The
+	// per-section caches dedupe identical content via FNV-64 hashes,
+	// so a redundant bump only costs one list-cache repopulation.
+	a.Bump()
+	// The prefix cache is keyed by a fingerprint that includes every
+	// section's source hash, so an unchanged section keeps its prefix
+	// cache valid while a changed section forces a miss naturally.
+	// Section caches themselves are content-keyed, so they do not
+	// need an explicit drop here either.
 	if !wasSpinning && a.isSpinning() {
 		return a.StartAnimation()
 	}
 	return nil
 }
 
-// ToggleExpanded toggles the expanded state of the thinking box and returns
-// whether the item is now expanded.
+// Finished implements list.Item. The assistant message is freezable
+// once the message reports IsFinished() and is no longer spinning
+// (no animation tick remains pending). Streaming tail animation is
+// caught by isSpinning, so freezing only kicks in once the turn is
+// fully terminal. The list cache invalidates the entry on the next
+// version bump if anything (focus, highlight, expansion) changes.
+func (a *AssistantMessageItem) Finished() bool {
+	return a.message.IsFinished() && !a.isSpinning()
+}
+
+// clearCache drops every cached render for this item, including the
+// per-section caches. Shadows the embedded cachedMessageItem.clearCache
+// so ClearItemCaches (style change) wipes the section caches too.
+// F8: also drop the streaming-markdown stable-prefix cache because
+// the cached glamour render embeds the OLD style's ANSI sequences
+// and is no longer visually consistent with the new style.
+func (a *AssistantMessageItem) clearCache() {
+	a.cachedMessageItem.clearCache()
+	a.thinkingSec.reset()
+	a.contentSec.reset()
+	a.errorSec.reset()
+	a.streamingContent.Reset()
+}
+
+// ToggleExpanded advances the F5 thinking view-mode cycle and returns
+// whether the item is now in any expanded state (tail-window or full).
+// The cycle is collapsed → tail-window → full → collapsed, with the
+// tail-window step skipped when the rendered thinking fits within
+// maxExpandedThinkingTailLines so short blocks remain a two-click
+// toggle. Both the thinking section cache and the F3 prefix cache
+// fold thinkingViewMode into their keys, so no explicit invalidation
+// is required here.
+//
+// When the message carries no thinking text the toggle is a no-op:
+// there is nothing to expand, and mutating the view mode would
+// thrash the thinking-section cache key for no visible benefit.
 func (a *AssistantMessageItem) ToggleExpanded() bool {
-	a.thinkingExpanded = !a.thinkingExpanded
-	a.clearCache()
-	return a.thinkingExpanded
+	if strings.TrimSpace(a.message.ReasoningContent().Thinking) == "" {
+		return a.thinkingViewMode != thinkingCollapsed
+	}
+	switch a.thinkingViewMode {
+	case thinkingCollapsed:
+		if a.tailWindowWouldTruncate() {
+			a.thinkingViewMode = thinkingTailWindow
+		} else {
+			a.thinkingViewMode = thinkingFullExpanded
+		}
+	case thinkingTailWindow:
+		a.thinkingViewMode = thinkingFullExpanded
+	case thinkingFullExpanded:
+		a.thinkingViewMode = thinkingCollapsed
+	}
+	a.Bump()
+	return a.thinkingViewMode != thinkingCollapsed
+}
+
+// tailWindowWouldTruncate reports whether the current thinking text
+// is long enough that the tail-window step is worth inserting into
+// the toggle cycle. We use a cheap source-text logical-line count
+// as the heuristic rather than peeking into the cache: the cache
+// may be populated in collapsed state (where its height is bounded
+// by maxCollapsedThinkingHeight and tells us nothing about the
+// underlying length), and re-running glamour just to count lines
+// would defeat the cache. The heuristic can over-trigger (a source
+// with many short lines may wrap to fewer than N lines), in which
+// case the tail-window render is visually identical to full and
+// the cycle costs the user one extra toggle — preferred over the
+// alternative of failing to show the affordance on a genuinely
+// long block.
+//
+// Logical line count is `1 + newlineCount` (a string with no
+// newlines is one line). Comparing newline count alone introduced
+// an off-by-one that let a source whose post-newline-split length
+// equalled the cap skip the tail-window step.
+func (a *AssistantMessageItem) tailWindowWouldTruncate() bool {
+	lineCount := 1 + strings.Count(a.message.ReasoningContent().Thinking, "\n")
+	return lineCount > maxExpandedThinkingTailLines
 }
 
 // HandleMouseClick implements MouseClickable. It signals (via a true return)

internal/ui/chat/assistant_section_cache_test.go 🔗

@@ -0,0 +1,464 @@
+package chat
+
+import (
+	"strings"
+	"testing"
+
+	"github.com/charmbracelet/crush/internal/message"
+	"github.com/charmbracelet/crush/internal/ui/styles"
+	"github.com/stretchr/testify/require"
+)
+
+// Fixed Unix timestamps for deterministic cache-equality tests. The
+// thinking section's `extra` hash folds in ThinkingDuration, which
+// in turn depends on (FinishedAt - StartedAt). Anchoring both
+// timestamps removes any wall-clock dependency from the cache key
+// so two builds across a second boundary still hit the cache.
+const (
+	testStartedAt  int64 = 1_700_000_000
+	testFinishedAt int64 = 1_700_000_005
+	testFinishTime int64 = 1_700_000_006
+)
+
+// thinkingMessage builds an assistant message with a fixed reasoning
+// content and an optional text content. When text is empty the
+// message represents a still-thinking turn (matches IsThinking()).
+// Both reasoning timestamps are anchored to fixed Unix seconds so
+// ThinkingDuration is deterministic and cache-equality assertions
+// don't depend on wall-clock time.
+func thinkingMessage(id, thinking, text string) *message.Message {
+	parts := []message.ContentPart{
+		message.ReasoningContent{
+			Thinking:   thinking,
+			StartedAt:  testStartedAt,
+			FinishedAt: testFinishedAt,
+		},
+	}
+	if text != "" {
+		parts = append(parts, message.TextContent{Text: text})
+	}
+	return &message.Message{
+		ID:    id,
+		Role:  message.Assistant,
+		Parts: parts,
+	}
+}
+
+// errorMessage builds a finished assistant message whose finish part
+// carries an error reason plus a custom message and details.
+func errorMessage(id, errMsg, errDetails string) *message.Message {
+	return &message.Message{
+		ID:   id,
+		Role: message.Assistant,
+		Parts: []message.ContentPart{
+			message.TextContent{Text: "partial output"},
+			message.Finish{
+				Reason:  message.FinishReasonError,
+				Message: errMsg,
+				Details: errDetails,
+				Time:    testFinishTime,
+			},
+		},
+	}
+}
+
+// renderTwoSetMessages drives a SetMessage cycle and returns the
+// section-cache identity (out string pointers via direct comparison
+// of the cached fields). The test compares `out` strings; identical
+// output across cycles is the cache-hit indicator we rely on.
+type sectionSnapshot struct {
+	thinking string
+	content  string
+	errSec   string
+}
+
+func snapshot(a *AssistantMessageItem) sectionSnapshot {
+	return sectionSnapshot{
+		thinking: a.thinkingSec.out,
+		content:  a.contentSec.out,
+		errSec:   a.errorSec.out,
+	}
+}
+
+// TestAssistantSectionCache_ContentChangeDoesNotInvalidateThinking covers
+// the central F4 invariant: streaming the main content through SetMessage
+// must keep the cached thinking render intact, provided the inputs to
+// the thinking section render (text, expanded flag, footer state) are
+// unchanged. We seed an already-non-empty content so that IsThinking()
+// is false on both renders — that's the steady streaming state where
+// the thinking block has finished and content keeps growing.
+func TestAssistantSectionCache_ContentChangeDoesNotInvalidateThinking(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	thinking := "Step 1\nStep 2\nStep 3"
+	msg := thinkingMessage("a1", thinking, "Initial answer.")
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	const width = 71
+
+	_ = item.RawRender(width)
+	first := snapshot(item)
+	require.NotEmpty(t, first.thinking, "thinking section must be populated after first render")
+
+	// Stream more content into the existing turn. Thinking text and
+	// footer state are byte-identical between the two renders.
+	updated := thinkingMessage("a1", thinking, "Initial answer. More streamed text.")
+	item.SetMessage(updated)
+	_ = item.RawRender(width)
+	second := snapshot(item)
+
+	require.Equal(t, first.thinking, second.thinking,
+		"content streaming must not invalidate the thinking section render")
+	require.NotEqual(t, first.content, second.content,
+		"content section must have been re-rendered")
+}
+
+// TestAssistantSectionCache_ThinkingChangeDoesNotInvalidateContent is the
+// mirror of the previous test: extending thinking text must not force a
+// re-render of the content section.
+func TestAssistantSectionCache_ThinkingChangeDoesNotInvalidateContent(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	content := "Final answer goes here."
+	msg := thinkingMessage("a2", "Step 1", content)
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	const width = 73
+
+	_ = item.RawRender(width)
+	first := snapshot(item)
+	require.NotEmpty(t, first.content)
+
+	updated := thinkingMessage("a2", "Step 1\nStep 2", content)
+	item.SetMessage(updated)
+	_ = item.RawRender(width)
+	second := snapshot(item)
+
+	require.Equal(t, first.content, second.content,
+		"thinking streaming must not invalidate the content section render")
+	require.NotEqual(t, first.thinking, second.thinking,
+		"thinking text changed; thinking section must have re-rendered")
+}
+
+// TestAssistantSectionCache_HashKeyDiscrimination asserts that two
+// messages with different source text hash to different per-section
+// keys, and that messages with identical source text hit the cache.
+func TestAssistantSectionCache_HashKeyDiscrimination(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	msgA := thinkingMessage("a3", "thinking A", "content A")
+	msgB := thinkingMessage("a3", "thinking B", "content B")
+
+	itemA := NewAssistantMessageItem(&sty, msgA).(*AssistantMessageItem)
+	itemB := NewAssistantMessageItem(&sty, msgB).(*AssistantMessageItem)
+
+	thinkSrcA, _ := itemA.thinkingKey()
+	thinkSrcB, _ := itemB.thinkingKey()
+	require.NotEqual(t, thinkSrcA, thinkSrcB,
+		"distinct thinking text must produce distinct FNV-64 source hashes")
+
+	contentSrcA, _ := itemA.contentKey()
+	contentSrcB, _ := itemB.contentKey()
+	require.NotEqual(t, contentSrcA, contentSrcB,
+		"distinct content text must produce distinct FNV-64 source hashes")
+
+	// Identical source text on a fresh item must produce the same
+	// hashes — keying invariant for cache hits.
+	itemAClone := NewAssistantMessageItem(&sty, thinkingMessage("a3", "thinking A", "content A")).(*AssistantMessageItem)
+	thinkSrcAClone, _ := itemAClone.thinkingKey()
+	contentSrcAClone, _ := itemAClone.contentKey()
+	require.Equal(t, thinkSrcA, thinkSrcAClone)
+	require.Equal(t, contentSrcA, contentSrcAClone)
+}
+
+// TestAssistantSectionCache_CloneRoundTrip guards the contract that
+// message.Clone() does not invalidate any section cache: re-keying off
+// the cloned message must produce identical hashes and the section
+// caches must serve byte-identical renders.
+func TestAssistantSectionCache_CloneRoundTrip(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	orig := thinkingMessage("a4", "Reasoning step.", "Answer text.")
+	item := NewAssistantMessageItem(&sty, orig).(*AssistantMessageItem)
+
+	const width = 75
+	_ = item.RawRender(width)
+	first := snapshot(item)
+
+	cloned := orig.Clone()
+	item.SetMessage(&cloned)
+	_ = item.RawRender(width)
+	second := snapshot(item)
+
+	require.Equal(t, first.thinking, second.thinking, "clone must hit the thinking cache")
+	require.Equal(t, first.content, second.content, "clone must hit the content cache")
+}
+
+// TestAssistantSectionCache_ResizeInvalidatesAll asserts that a width
+// change forces a re-render of every section.
+func TestAssistantSectionCache_ResizeInvalidatesAll(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	msg := errorMessage("a5", "boom", strings.Repeat("detail line\n", 5))
+	// errorMessage returns FinishReasonError; combine with thinking
+	// content so all three sections are exercised.
+	msg.Parts = append([]message.ContentPart{
+		message.ReasoningContent{
+			Thinking:   "Considering options.",
+			StartedAt:  testStartedAt,
+			FinishedAt: testFinishedAt,
+		},
+	}, msg.Parts...)
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	_ = item.RawRender(77)
+	first := snapshot(item)
+	require.NotEmpty(t, first.thinking)
+	require.NotEmpty(t, first.content)
+	require.NotEmpty(t, first.errSec)
+
+	_ = item.RawRender(117)
+	second := snapshot(item)
+
+	require.NotEqual(t, first.thinking, second.thinking, "resize must re-render the thinking section")
+	require.NotEqual(t, first.content, second.content, "resize must re-render the content section")
+	require.NotEqual(t, first.errSec, second.errSec, "resize must re-render the error section")
+}
+
+// TestAssistantSectionCache_ErrorIndependentOfThinkingAndContent guards
+// that the error section caches independently. Editing the error
+// message must not invalidate the other two sections, and editing the
+// content must not invalidate the error section.
+func TestAssistantSectionCache_ErrorIndependentOfThinkingAndContent(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	build := func(thinking, content, errMsg, errDetails string) *message.Message {
+		return &message.Message{
+			ID:   "a6",
+			Role: message.Assistant,
+			Parts: []message.ContentPart{
+				message.ReasoningContent{
+					Thinking:   thinking,
+					StartedAt:  testStartedAt,
+					FinishedAt: testFinishedAt,
+				},
+				message.TextContent{Text: content},
+				message.Finish{
+					Reason:  message.FinishReasonError,
+					Message: errMsg,
+					Details: errDetails,
+					Time:    testFinishTime,
+				},
+			},
+		}
+	}
+
+	item := NewAssistantMessageItem(&sty, build("think", "content", "boom", "details")).(*AssistantMessageItem)
+	_ = item.RawRender(79)
+	first := snapshot(item)
+
+	// Change only the error text. Thinking and content caches must
+	// survive; error cache must miss and re-render.
+	item.SetMessage(build("think", "content", "different boom", "different details"))
+	_ = item.RawRender(79)
+	second := snapshot(item)
+
+	require.Equal(t, first.thinking, second.thinking, "error change must not invalidate thinking")
+	require.Equal(t, first.content, second.content, "error change must not invalidate content")
+	require.NotEqual(t, first.errSec, second.errSec, "error change must re-render the error section")
+
+	// Now change only the content; error cache must survive.
+	item.SetMessage(build("think", "different content", "different boom", "different details"))
+	_ = item.RawRender(79)
+	third := snapshot(item)
+
+	require.Equal(t, second.thinking, third.thinking)
+	require.NotEqual(t, second.content, third.content)
+	require.Equal(t, second.errSec, third.errSec, "content change must not invalidate the error section")
+}
+
+// TestAssistantSectionCache_PrefixCacheRespectsSectionChanges guards
+// the F3/F4 boundary: the prefix cache must invalidate when any
+// underlying section changes. We verify by comparing the F3-cached
+// Render output across SetMessage cycles.
+func TestAssistantSectionCache_PrefixCacheRespectsSectionChanges(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	build := func(content string) *message.Message {
+		return &message.Message{
+			ID:   "a7",
+			Role: message.Assistant,
+			Parts: []message.ContentPart{
+				message.TextContent{Text: content},
+				message.Finish{Reason: message.FinishReasonEndTurn, Time: testFinishTime},
+			},
+		}
+	}
+
+	item := NewAssistantMessageItem(&sty, build("first content")).(*AssistantMessageItem)
+	item.SetFocused(true)
+
+	const width = 81
+	first := item.Render(width)
+
+	item.SetMessage(build("second content"))
+	second := item.Render(width)
+	require.NotEqual(t, first, second,
+		"prefix cache must invalidate when the content section changes")
+
+	// Re-set to the original content; the prefix cache should
+	// produce identical output again.
+	item.SetMessage(build("first content"))
+	third := item.Render(width)
+	require.Equal(t, first, third)
+}
+
+// TestAssistantSectionCache_ByteIdenticalToFreshRender asserts that the
+// F4 cached path produces the same bytes as a fresh-instance render of
+// the equivalent message — i.e. caching is invisible from the outside.
+// Drives a sequence of mutations (thinking change, content change,
+// finish) and compares every step against an independent item rendered
+// from scratch.
+func TestAssistantSectionCache_ByteIdenticalToFreshRender(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	const width = 83
+
+	type step struct {
+		name string
+		msg  *message.Message
+	}
+	startedAt := testStartedAt
+	finishedAt := testFinishedAt
+	finishTime := testFinishTime
+	steps := []step{
+		{
+			name: "thinking-only",
+			msg: &message.Message{
+				ID: "iso", Role: message.Assistant,
+				Parts: []message.ContentPart{
+					message.ReasoningContent{Thinking: "first reasoning", StartedAt: startedAt},
+				},
+			},
+		},
+		{
+			name: "thinking-grew",
+			msg: &message.Message{
+				ID: "iso", Role: message.Assistant,
+				Parts: []message.ContentPart{
+					message.ReasoningContent{Thinking: "first reasoning more", StartedAt: startedAt},
+				},
+			},
+		},
+		{
+			name: "content-arrived",
+			msg: &message.Message{
+				ID: "iso", Role: message.Assistant,
+				Parts: []message.ContentPart{
+					message.ReasoningContent{Thinking: "first reasoning more", StartedAt: startedAt, FinishedAt: finishedAt},
+					message.TextContent{Text: "the answer"},
+				},
+			},
+		},
+		{
+			name: "finished-end-turn",
+			msg: &message.Message{
+				ID: "iso", Role: message.Assistant,
+				Parts: []message.ContentPart{
+					message.ReasoningContent{Thinking: "first reasoning more", StartedAt: startedAt, FinishedAt: finishedAt},
+					message.TextContent{Text: "the answer"},
+					message.Finish{Reason: message.FinishReasonEndTurn, Time: finishTime},
+				},
+			},
+		},
+	}
+
+	first := steps[0].msg.Clone()
+	cached := NewAssistantMessageItem(&sty, &first).(*AssistantMessageItem)
+	for _, s := range steps {
+		cached.SetMessage(s.msg)
+		freshMsg := s.msg.Clone()
+		fresh := NewAssistantMessageItem(&sty, &freshMsg).(*AssistantMessageItem)
+		require.Equal(t, fresh.RawRender(width), cached.RawRender(width),
+			"step %q: cached path must match fresh render byte-for-byte", s.name)
+	}
+}
+
+// TestAssistantSectionCache_PrefixCacheInvalidatesOnCompositionOnlyChange
+// guards the F3 prefix cache against composition-only changes:
+// flipping the finish reason from EndTurn to Canceled appends a
+// constant "Canceled" line via renderMessageContent, but no
+// section's own source text changes. The prefix cache must observe
+// the difference (compositionKey is folded into prefixCacheKey) and
+// the resulting bytes must differ. As a second guarantee, a fresh
+// item built with the same final state must produce byte-equal
+// output to the cached item — caching must never produce stale or
+// divergent renders.
+func TestAssistantSectionCache_PrefixCacheInvalidatesOnCompositionOnlyChange(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	const width = 87
+
+	build := func(reason message.FinishReason) *message.Message {
+		return &message.Message{
+			ID:   "comp",
+			Role: message.Assistant,
+			Parts: []message.ContentPart{
+				message.TextContent{Text: "hi"},
+				message.Finish{Reason: reason, Time: testFinishTime},
+			},
+		}
+	}
+
+	item := NewAssistantMessageItem(&sty, build(message.FinishReasonEndTurn)).(*AssistantMessageItem)
+	endTurnOut := item.Render(width)
+
+	// Flip only the finish reason. Thinking is empty and content
+	// text is unchanged, so no section's source hash moves; only
+	// compositionKey shifts. The prefix cache must miss.
+	item.SetMessage(build(message.FinishReasonCanceled))
+	canceledOut := item.Render(width)
+	require.NotEqual(t, endTurnOut, canceledOut,
+		"prefix cache must invalidate on composition-only change (finish reason)")
+
+	// A fresh item built with the same final state must match the
+	// cached item byte-for-byte — caching is invisible from the
+	// outside and never serves stale output.
+	fresh := NewAssistantMessageItem(&sty, build(message.FinishReasonCanceled)).(*AssistantMessageItem)
+	require.Equal(t, fresh.Render(width), canceledOut,
+		"cached output must equal a fresh render of the same final state")
+}
+
+// TestAssistantSectionCache_ThinkingBoxHeightSurvivesCacheHit guards
+// click-detection geometry across thinking-section cache hits. The
+// thinking box height feeds HandleMouseClick; it is recomputed
+// inside renderThinking and must be restored from
+// assistantSection.aux when the thinking cache hits. We render once
+// to capture the original height, trigger a content-only change so
+// thinkingKey stays identical (thinking text, expanded flag, and
+// footer state all unchanged), render again, and assert the
+// thinkingBoxHeight field is preserved.
+func TestAssistantSectionCache_ThinkingBoxHeightSurvivesCacheHit(t *testing.T) {
+	sty := styles.CharmtonePantera()
+	const width = 71
+
+	thinking := strings.Join([]string{
+		"Considering the request.",
+		"Looking at the relevant files.",
+		"Drafting a plan.",
+		"Verifying constraints.",
+	}, "\n")
+	msg := thinkingMessage("hbox", thinking, "initial answer")
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+	item.thinkingViewMode = thinkingFullExpanded
+
+	_ = item.RawRender(width)
+	originalHeight := item.thinkingBoxHeight
+	require.Greater(t, originalHeight, 0,
+		"thinking box height must be populated after first render")
+
+	// Stomp the field so a stale read (cache hit that fails to
+	// restore aux) is detectable. Then trigger a content-only
+	// change: thinkingKey is byte-identical between renders, so
+	// the thinking section cache must hit and restore the
+	// preserved height via assistantSection.aux.
+	item.thinkingBoxHeight = -1
+	updated := thinkingMessage("hbox", thinking, "initial answer with more streamed text")
+	item.SetMessage(updated)
+	_ = item.RawRender(width)
+
+	require.Equal(t, originalHeight, item.thinkingBoxHeight,
+		"thinkingBoxHeight must be preserved across thinking section cache hits "+
+			"so HandleMouseClick keeps targeting the right rows")
+}

internal/ui/chat/assistant_test.go 🔗

@@ -10,21 +10,108 @@ import (
 )
 
 // TestAssistantMessageItemExpandable guards the Expandable contract on
-// AssistantMessageItem. The earlier implementation returned no value, which
-// meant the type silently did not satisfy chat.Expandable and the
-// keyboard-driven expand path in model/chat.go skipped thinking blocks.
+// AssistantMessageItem along the keyboard-driven expand path. The earlier
+// implementation returned no value, which meant the type silently did
+// not satisfy chat.Expandable and the keyboard-driven expand path in
+// model/chat.go skipped thinking blocks.
+//
+// We exercise the contract through the bare Expandable interface (the
+// same dispatch site model.Chat.ToggleExpandedSelectedItem uses), which
+// proves both that AssistantMessageItem still satisfies the interface
+// and that the bool return reports the right semantic state at every
+// point in the cycle.
 func TestAssistantMessageItemExpandable(t *testing.T) {
 	t.Parallel()
 
 	sty := styles.CharmtonePantera()
-	msg := &message.Message{ID: "m1", Role: message.Assistant}
-	item := NewAssistantMessageItem(&sty, msg)
+	// Short thinking: under the tail-window cap, so the cycle is
+	// collapsed -> full -> collapsed (tail-window is skipped).
+	msg := thinkingMessage("m1", "step one\nstep two\nstep three", "")
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
 
-	exp, ok := item.(Expandable)
+	exp, ok := any(item).(Expandable)
 	require.True(t, ok, "AssistantMessageItem must satisfy Expandable")
 
-	require.True(t, exp.ToggleExpanded(), "first toggle should report expanded")
-	require.False(t, exp.ToggleExpanded(), "second toggle should report collapsed")
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode,
+		"new items must start in the collapsed view-mode")
+	require.True(t, exp.ToggleExpanded(),
+		"first toggle of a non-empty thinking block must report expanded")
+	require.Equal(t, thinkingFullExpanded, item.thinkingViewMode,
+		"short blocks must skip tail-window and land in full expansion")
+	require.False(t, exp.ToggleExpanded(),
+		"second toggle must report collapsed (cycle closed)")
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode)
+}
+
+// TestAssistantMessageItemExpandableEmptyThinkingNoOp guards the B2
+// fix: a message with no thinking text must treat ToggleExpanded as a
+// no-op. Mutating the view mode in that case would thrash the
+// thinking-section cache key for no visible benefit and would surprise
+// the caller (model.Chat.ToggleExpandedSelectedItem would treat a
+// "now collapsed" return as a real state change and re-scroll on it).
+func TestAssistantMessageItemExpandableEmptyThinkingNoOp(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	msg := &message.Message{ID: "m1-empty", Role: message.Assistant}
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	exp, ok := any(item).(Expandable)
+	require.True(t, ok, "AssistantMessageItem must satisfy Expandable")
+
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode)
+	require.False(t, exp.ToggleExpanded(),
+		"empty thinking must report current (collapsed) state without flipping")
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode,
+		"empty-thinking toggle must not mutate thinkingViewMode")
+
+	// Whitespace-only thinking is still effectively empty.
+	item.message.Parts = []message.ContentPart{
+		message.ReasoningContent{Thinking: "  \n\n\t  ", StartedAt: testStartedAt},
+	}
+	require.False(t, exp.ToggleExpanded())
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode)
+}
+
+// TestAssistantMessageItemTailWindowBoundary guards the B1 fix: the
+// tail-window heuristic must compare logical line counts (1 +
+// newline count) against the cap, not raw newline counts. A source
+// whose logical line count exactly equals the cap must NOT trip the
+// tail-window step (full render still fits cleanly under the cap),
+// while one logical line over the cap must trip it.
+func TestAssistantMessageItemTailWindowBoundary(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+
+	atCap := buildLines(maxExpandedThinkingTailLines)
+	overCap := buildLines(maxExpandedThinkingTailLines + 1)
+
+	atItem := NewAssistantMessageItem(&sty, thinkingMessage("at-cap", atCap, "")).(*AssistantMessageItem)
+	require.False(t, atItem.tailWindowWouldTruncate(),
+		"a source with exactly N logical lines must not trip the tail-window step")
+
+	overItem := NewAssistantMessageItem(&sty, thinkingMessage("over-cap", overCap, "")).(*AssistantMessageItem)
+	require.True(t, overItem.tailWindowWouldTruncate(),
+		"a source with N+1 logical lines must trip the tail-window step")
+}
+
+// buildLines returns a string of n logical lines (n-1 newlines). Each
+// line is a unique short token so callers can distinguish head from
+// tail in rendered output if they need to.
+func buildLines(n int) string {
+	if n <= 0 {
+		return ""
+	}
+	var b []byte
+	for i := 1; i <= n; i++ {
+		if i > 1 {
+			b = append(b, '\n')
+		}
+		b = append(b, 'l', 'n')
+		b = append(b, []byte(itoa(i))...)
+	}
+	return string(b)
 }
 
 // TestAssistantMessageItemHandleMouseClick ensures HandleMouseClick does not
@@ -40,15 +127,16 @@ func TestAssistantMessageItemHandleMouseClick(t *testing.T) {
 	item.thinkingBoxHeight = 5
 
 	// Click inside the thinking box signals handled but must not mutate
-	// the expanded state.
+	// the view-mode state.
 	require.True(t, item.HandleMouseClick(ansi.MouseLeft, 0, 2))
-	require.False(t, item.thinkingExpanded, "HandleMouseClick must not toggle expansion on its own")
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode,
+		"HandleMouseClick must not toggle expansion on its own")
 
 	// Click outside the thinking box is ignored entirely.
 	require.False(t, item.HandleMouseClick(ansi.MouseLeft, 0, 10))
-	require.False(t, item.thinkingExpanded)
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode)
 
 	// Non-left button is ignored.
 	require.False(t, item.HandleMouseClick(ansi.MouseRight, 0, 2))
-	require.False(t, item.thinkingExpanded)
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode)
 }

internal/ui/chat/assistant_thinking_window_test.go 🔗

@@ -0,0 +1,500 @@
+package chat
+
+import (
+	"strings"
+	"testing"
+
+	"charm.land/lipgloss/v2"
+	"github.com/charmbracelet/crush/internal/message"
+	"github.com/charmbracelet/crush/internal/ui/styles"
+	"github.com/charmbracelet/x/ansi"
+	"github.com/stretchr/testify/require"
+)
+
+// thinkingMessageWithLines builds a still-thinking assistant message
+// whose reasoning content is `count` short paragraphs separated by
+// blank lines. The blank-line separation is what matters: glamour
+// renders paragraph blocks one-per-line in the output (with a
+// trailing blank line between paragraphs) instead of reflowing the
+// entire input into one big wrapped paragraph. That gives us a
+// post-glamour line count we can drive past the tail-window
+// threshold deterministically. Each paragraph is tagged with its
+// (1-based) index so the test can identify head vs tail in the
+// rendered output.
+//
+// The message has no text content and no Finish part, so
+// IsThinking() returns true and the render path skips the
+// "Thought for" footer — keeping the rendered height computation
+// simple.
+func thinkingMessageWithLines(id string, count int) *message.Message {
+	var b strings.Builder
+	for i := 1; i <= count; i++ {
+		b.WriteString("ln")
+		b.WriteString(itoa(i))
+		if i < count {
+			// Blank line between paragraphs: glamour preserves the
+			// per-paragraph structure rather than reflowing into one
+			// wrapped block, so totalLines tracks count predictably.
+			b.WriteString("\n\n")
+		}
+	}
+	return &message.Message{
+		ID:   id,
+		Role: message.Assistant,
+		Parts: []message.ContentPart{
+			message.ReasoningContent{
+				Thinking:  b.String(),
+				StartedAt: testStartedAt,
+			},
+		},
+	}
+}
+
+// itoa is a local stdlib-free integer formatter used only by these
+// tests; pulling fmt in just for %d would be wasteful when the test
+// fixtures already churn 5000+ short strings.
+func itoa(n int) string {
+	if n == 0 {
+		return "0"
+	}
+	var buf [20]byte
+	i := len(buf)
+	for n > 0 {
+		i--
+		buf[i] = byte('0' + n%10)
+		n /= 10
+	}
+	return string(buf[i:])
+}
+
+// renderedThinkingHeight returns the line count of the cached
+// thinking section render only (not the full RawRender, which also
+// includes the content and error sections). Drives a render at
+// `width` first to populate the cache.
+func renderedThinkingHeight(t *testing.T, item *AssistantMessageItem, width int) int {
+	t.Helper()
+	_ = item.RawRender(width)
+	require.NotEmpty(t, item.thinkingSec.out,
+		"thinking section must be populated after RawRender")
+	return lipgloss.Height(item.thinkingSec.out)
+}
+
+// TestThinkingWindow_CollapsedCapPreserved guards that F5 did not
+// regress the existing collapsed-mode behaviour: a 5000-line
+// thinking block in the default (collapsed) state still renders at
+// most a small bounded height — the last `maxCollapsedThinkingHeight`
+// lines plus the truncation hint. The thinking message keeps
+// IsThinking() == true, so the optional "Thought for" footer is
+// suppressed and the section height equals the box height.
+func TestThinkingWindow_CollapsedCapPreserved(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	msg := thinkingMessageWithLines("collapsed", 5000)
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	// Default state must be collapsed.
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode)
+
+	// Unique odd width avoids sharing the glamour renderer cache with
+	// any other parallel test (the renderer instance is memoized per
+	// width and is not safe for concurrent Render calls).
+	const width = 91
+	height := renderedThinkingHeight(t, item, width)
+
+	// Collapsed mode keeps the existing cap: last 10 lines + a
+	// 2-line hint prefix (hint + blank). Allow a small slack for
+	// any future style-driven padding so the test is robust to
+	// cosmetic tweaks while still being orders of magnitude below
+	// the 5000-line source.
+	const collapsedUpperBound = maxCollapsedThinkingHeight + 5
+	require.LessOrEqual(t, height, collapsedUpperBound,
+		"collapsed mode must remain bounded by the small cap; got %d", height)
+}
+
+// TestThinkingWindow_ExpandedShortSkipsTailWindow guards that a
+// short thinking block (well under the tail-window cap) still
+// toggles directly to full expansion without an intermediate
+// tail-window step and shows no affordance footer. The cycle is
+// collapsed -> full -> collapsed for short blocks; tail-window is
+// only inserted when it would actually elide content.
+func TestThinkingWindow_ExpandedShortSkipsTailWindow(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	const lines = 50
+	require.Less(t, lines, maxExpandedThinkingTailLines,
+		"this test relies on the source being well under the tail cap")
+	msg := thinkingMessageWithLines("short", lines)
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	require.True(t, item.ToggleExpanded(),
+		"first toggle should report expanded")
+	require.Equal(t, thinkingFullExpanded, item.thinkingViewMode,
+		"short blocks must skip tail-window and go straight to full expansion")
+
+	const width = 93
+	_ = item.RawRender(width)
+	out := item.thinkingSec.out
+	plain := ansi.Strip(out)
+
+	require.NotContains(t, plain, "earlier lines hidden",
+		"short blocks must not show the tail-window affordance")
+	require.NotContains(t, plain, "lines hidden",
+		"short expanded blocks must not show any truncation hint")
+	require.Contains(t, plain, "ln1 ",
+		"a fully expanded short block must include the very first source paragraph")
+	require.Contains(t, plain, "ln50 ",
+		"a fully expanded short block must include the last source paragraph")
+}
+
+// TestThinkingWindow_TailWindowed asserts the central F5 behaviour:
+// expanding a long thinking block produces a tail window of size
+// `maxExpandedThinkingTailLines` plus the affordance footer, with
+// the LAST source line present (i.e. we tailed, not headed) and
+// earlier lines elided.
+//
+// Beyond presence/absence of sentinels, this test verifies a true
+// `tail -K` relationship between the tail-windowed render and the
+// fully-expanded render of the same source at the same width: the
+// last K plain-ANSI lines of the windowed render must byte-equal
+// the last K lines of the unwindowed render.
+//
+// K is sized below the cap to absorb the affordance prefix (hint +
+// blank line) and any small framing differences introduced by the
+// bordered ThinkingBox. The cap minus 5 leaves a comfortable margin
+// for padding/footer rows while still asserting that the bulk of
+// the rendered tail is identical.
+func TestThinkingWindow_TailWindowed(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	const total = 5000
+	const width = 95
+
+	// Tail-windowed render.
+	tailMsg := thinkingMessageWithLines("tail", total)
+	tailItem := NewAssistantMessageItem(&sty, tailMsg).(*AssistantMessageItem)
+	require.True(t, tailItem.ToggleExpanded(), "first toggle should report expanded")
+	require.Equal(t, thinkingTailWindow, tailItem.thinkingViewMode,
+		"a long block must enter tail-window after the first toggle")
+
+	height := renderedThinkingHeight(t, tailItem, width)
+
+	// The visible window is N tail lines plus an affordance line
+	// and a blank-line spacer (matching the existing collapsed-mode
+	// hint structure). Allow a small slack for style-driven
+	// padding.
+	const expectedFloor = maxExpandedThinkingTailLines + 1
+	const expectedCeil = maxExpandedThinkingTailLines + 5
+	require.GreaterOrEqual(t, height, expectedFloor,
+		"tail-window must include at least N + affordance lines; got %d", height)
+	require.LessOrEqual(t, height, expectedCeil,
+		"tail-window must not exceed N + a small padding budget; got %d", height)
+
+	tailPlain := ansi.Strip(tailItem.thinkingSec.out)
+
+	require.Contains(t, tailPlain, "earlier lines hidden",
+		"tail-windowed render must include the affordance footer")
+	require.Contains(t, tailPlain, "ln5000",
+		"tail-windowed render must include the LAST source paragraph — we tailed, not headed")
+	require.NotContains(t, tailPlain, "ln1 ",
+		"tail-windowed render must elide early source paragraphs")
+
+	// Independent reference render: same source, same width, full
+	// expansion (no tail slice). The tail-windowed output's last K
+	// lines must byte-equal the unwindowed output's last K lines.
+	fullMsg := thinkingMessageWithLines("tail-full-ref", total)
+	fullItem := NewAssistantMessageItem(&sty, fullMsg).(*AssistantMessageItem)
+	fullItem.thinkingViewMode = thinkingFullExpanded
+	_ = fullItem.RawRender(width)
+	fullPlain := ansi.Strip(fullItem.thinkingSec.out)
+
+	tailLines := strings.Split(tailPlain, "\n")
+	fullLines := strings.Split(fullPlain, "\n")
+
+	// K is the cap minus a small budget that covers the affordance
+	// prefix (hint line + blank line) and any framing differences
+	// the bordered ThinkingBox style may introduce around the
+	// edges. Documented inline because going much larger lets the
+	// affordance row leak into the comparison; going much smaller
+	// dilutes the assertion.
+	const K = maxExpandedThinkingTailLines - 5
+	require.GreaterOrEqual(t, len(tailLines), K,
+		"tail render must contain at least K lines; got %d", len(tailLines))
+	require.GreaterOrEqual(t, len(fullLines), K,
+		"full render must contain at least K lines; got %d", len(fullLines))
+
+	tailTail := tailLines[len(tailLines)-K:]
+	fullTail := fullLines[len(fullLines)-K:]
+	require.Equal(t, fullTail, tailTail,
+		"tail-windowed render's last %d lines must byte-equal the unwindowed render's last %d lines (true tail -K relationship)",
+		K, K)
+}
+
+// TestThinkingWindow_PromoteToFull verifies the cycle continues from
+// tail-window to full expansion: the second toggle drops the
+// affordance, removes the tail slice, and produces a render that
+// matches a fresh item rendered directly in the full-expanded
+// state.
+func TestThinkingWindow_PromoteToFull(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	const total = 1500
+	msg := thinkingMessageWithLines("promote", total)
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	const width = 97
+
+	require.True(t, item.ToggleExpanded())
+	require.Equal(t, thinkingTailWindow, item.thinkingViewMode)
+	_ = item.RawRender(width)
+	tailOut := item.thinkingSec.out
+	require.Contains(t, ansi.Strip(tailOut), "earlier lines hidden")
+
+	require.True(t, item.ToggleExpanded(), "second toggle stays expanded (full)")
+	require.Equal(t, thinkingFullExpanded, item.thinkingViewMode)
+	_ = item.RawRender(width)
+	fullOut := item.thinkingSec.out
+	fullPlain := ansi.Strip(fullOut)
+
+	require.NotContains(t, fullPlain, "earlier lines hidden",
+		"full expansion must drop the tail-window affordance")
+	require.Contains(t, fullPlain, "ln1 ",
+		"full expansion must include the first source paragraph")
+	require.Contains(t, fullPlain, "ln1500 ",
+		"full expansion must include the last source paragraph")
+
+	// Independent reference: a fresh item, rendered straight into
+	// the full-expanded state, must produce byte-equal output.
+	freshMsg := thinkingMessageWithLines("promote-fresh", total)
+	fresh := NewAssistantMessageItem(&sty, freshMsg).(*AssistantMessageItem)
+	fresh.thinkingViewMode = thinkingFullExpanded
+	_ = fresh.RawRender(width)
+	require.Equal(t, fresh.thinkingSec.out, fullOut,
+		"cached full-expanded output must match a fresh full-expanded render")
+
+	// And the cycle closes back to collapsed.
+	require.False(t, item.ToggleExpanded(), "third toggle must report collapsed")
+	require.Equal(t, thinkingCollapsed, item.thinkingViewMode)
+}
+
+// sectionKey is the tuple that defines a cache-hit identity for an
+// assistantSection: (width, srcHash, extra). Comparing this tuple
+// across mutations is a stronger invariant than byte-equality of
+// rendered output: byte-equality could in principle hold even if
+// the cache invalidated and re-rendered identical bytes, while
+// tuple-equality proves the lookup key never moved.
+type sectionKey struct {
+	width   int
+	srcHash uint64
+	extra   uint64
+}
+
+func keyOf(s assistantSection) sectionKey {
+	return sectionKey{width: s.width, srcHash: s.srcHash, extra: s.extra}
+}
+
+// TestThinkingWindow_ContentChangeKeepsThinkingCacheInTailWindow
+// guards the F4/F5 boundary: streaming the main content while the
+// thinking block sits in tail-window mode must NOT invalidate the
+// thinking section cache. Tail-window state is folded into
+// thinkingKey()'s extra hash, so changing only the content text
+// keeps thinking's (srcHash, extra) tuple identical and the cache
+// hits.
+//
+// The assertion is on the cache key tuple, not just rendered bytes:
+// equal output could in principle survive a re-render with
+// identical inputs, but identical (width, srcHash, extra) tuples
+// across the SetMessage cycle prove the thinking cache was never
+// invalidated to begin with. The mirror tuple on the content
+// section MUST move (the source text changed), or the test isn't
+// exercising what it claims to.
+func TestThinkingWindow_ContentChangeKeepsThinkingCacheInTailWindow(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	const total = 1000
+
+	build := func(content string) *message.Message {
+		var b strings.Builder
+		for i := 1; i <= total; i++ {
+			b.WriteString("ln")
+			b.WriteString(itoa(i))
+			if i < total {
+				b.WriteString("\n\n")
+			}
+		}
+		parts := []message.ContentPart{
+			message.ReasoningContent{
+				Thinking:   b.String(),
+				StartedAt:  testStartedAt,
+				FinishedAt: testFinishedAt,
+			},
+		}
+		if content != "" {
+			parts = append(parts, message.TextContent{Text: content})
+		}
+		return &message.Message{ID: "tail-stream", Role: message.Assistant, Parts: parts}
+	}
+
+	item := NewAssistantMessageItem(&sty, build("first answer")).(*AssistantMessageItem)
+	item.thinkingViewMode = thinkingTailWindow
+
+	const width = 99
+	_ = item.RawRender(width)
+	first := snapshot(item)
+	firstThinkingKey := keyOf(item.thinkingSec)
+	firstContentKey := keyOf(item.contentSec)
+	require.NotEmpty(t, first.thinking)
+
+	item.SetMessage(build("first answer with more streaming text"))
+	_ = item.RawRender(width)
+	second := snapshot(item)
+	secondThinkingKey := keyOf(item.thinkingSec)
+	secondContentKey := keyOf(item.contentSec)
+
+	require.Equal(t, firstThinkingKey, secondThinkingKey,
+		"thinking section's (width, srcHash, extra) tuple must not move "+
+			"across a content-only update — proves the cache key never invalidated")
+	require.Equal(t, first.thinking, second.thinking,
+		"content streaming must not invalidate the tail-windowed thinking cache")
+	require.NotEqual(t, firstContentKey, secondContentKey,
+		"content section's tuple MUST move; otherwise this test isn't exercising a real content change")
+	require.NotEqual(t, first.content, second.content,
+		"content section must have re-rendered")
+}
+
+// TestThinkingWindow_ToggleInvalidatesOnlyThinking verifies that
+// cycling thinkingViewMode invalidates the thinking section cache
+// alone — content and error caches survive across the toggle.
+//
+// Like TestThinkingWindow_ContentChangeKeepsThinkingCacheInTailWindow,
+// the assertion is on the cache key tuple (width, srcHash, extra)
+// at each section, not just on rendered bytes:
+//   - thinking's tuple MUST move (extra folds in thinkingViewMode)
+//   - content's and error's tuples MUST NOT move (their keys depend
+//     only on their own source text, untouched by the toggle).
+func TestThinkingWindow_ToggleInvalidatesOnlyThinking(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	const total = 1500
+	build := func() *message.Message {
+		var b strings.Builder
+		for i := 1; i <= total; i++ {
+			b.WriteString("ln")
+			b.WriteString(itoa(i))
+			if i < total {
+				b.WriteString("\n\n")
+			}
+		}
+		return &message.Message{
+			ID:   "toggle-iso",
+			Role: message.Assistant,
+			Parts: []message.ContentPart{
+				message.ReasoningContent{
+					Thinking:   b.String(),
+					StartedAt:  testStartedAt,
+					FinishedAt: testFinishedAt,
+				},
+				message.TextContent{Text: "answer text"},
+				message.Finish{
+					Reason:  message.FinishReasonError,
+					Message: "boom",
+					Details: "details",
+					Time:    testFinishTime,
+				},
+			},
+		}
+	}
+
+	item := NewAssistantMessageItem(&sty, build()).(*AssistantMessageItem)
+
+	const width = 101
+	_ = item.RawRender(width)
+	first := snapshot(item)
+	firstThink := keyOf(item.thinkingSec)
+	firstContent := keyOf(item.contentSec)
+	firstErr := keyOf(item.errorSec)
+	require.NotEmpty(t, first.thinking)
+	require.NotEmpty(t, first.content)
+	require.NotEmpty(t, first.errSec)
+
+	// Cycle: collapsed -> tail-window. Only thinking should change.
+	require.True(t, item.ToggleExpanded())
+	require.Equal(t, thinkingTailWindow, item.thinkingViewMode)
+	_ = item.RawRender(width)
+	second := snapshot(item)
+	secondThink := keyOf(item.thinkingSec)
+	secondContent := keyOf(item.contentSec)
+	secondErr := keyOf(item.errorSec)
+
+	require.NotEqual(t, firstThink, secondThink,
+		"thinking section's tuple MUST move on toggle (extra folds in thinkingViewMode)")
+	require.Equal(t, firstContent, secondContent,
+		"content section's tuple must not move on a thinking toggle")
+	require.Equal(t, firstErr, secondErr,
+		"error section's tuple must not move on a thinking toggle")
+	require.NotEqual(t, first.thinking, second.thinking,
+		"toggling into tail-window must re-render the thinking section")
+	require.Equal(t, first.content, second.content,
+		"toggling thinking view-mode must not invalidate the content section")
+	require.Equal(t, first.errSec, second.errSec,
+		"toggling thinking view-mode must not invalidate the error section")
+
+	// Cycle: tail-window -> full. Same expectation.
+	require.True(t, item.ToggleExpanded())
+	require.Equal(t, thinkingFullExpanded, item.thinkingViewMode)
+	_ = item.RawRender(width)
+	third := snapshot(item)
+	thirdThink := keyOf(item.thinkingSec)
+	thirdContent := keyOf(item.contentSec)
+	thirdErr := keyOf(item.errorSec)
+
+	require.NotEqual(t, secondThink, thirdThink,
+		"thinking section's tuple MUST move on the second toggle as well")
+	require.Equal(t, secondContent, thirdContent,
+		"content section's tuple must remain stable across the second toggle")
+	require.Equal(t, secondErr, thirdErr,
+		"error section's tuple must remain stable across the second toggle")
+	require.NotEqual(t, second.thinking, third.thinking,
+		"toggling into full expansion must re-render the thinking section")
+	require.Equal(t, second.content, third.content)
+	require.Equal(t, second.errSec, third.errSec)
+}
+
+// TestThinkingWindow_BoxHeightTracksWindow asserts that
+// thinkingBoxHeight reflects the WINDOWED render's height in
+// tail-window mode, not the (much larger) full thinking height.
+// This is what HandleMouseClick uses to detect whether a click
+// landed on the thinking box, so getting it wrong would make
+// click detection extend off the bottom of the visible box.
+func TestThinkingWindow_BoxHeightTracksWindow(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	const total = 5000
+	msg := thinkingMessageWithLines("box-height", total)
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	const width = 103
+
+	// Tail-window: height should be roughly the cap.
+	item.thinkingViewMode = thinkingTailWindow
+	_ = item.RawRender(width)
+	tailHeight := item.thinkingBoxHeight
+	require.Greater(t, tailHeight, 0)
+	require.LessOrEqual(t, tailHeight, maxExpandedThinkingTailLines+5,
+		"tail-window box height must reflect the windowed render, not the full thinking height; got %d",
+		tailHeight)
+
+	// Full expansion: height should grow well past the tail cap.
+	item.thinkingViewMode = thinkingFullExpanded
+	_ = item.RawRender(width)
+	fullHeight := item.thinkingBoxHeight
+	require.Greater(t, fullHeight, maxExpandedThinkingTailLines*2,
+		"full expansion box height must reflect the full thinking render; got %d",
+		fullHeight)
+}

internal/ui/chat/incremental_glamour_test.go 🔗

@@ -0,0 +1,853 @@
+package chat
+
+import (
+	"strings"
+	"testing"
+
+	"charm.land/glamour/v2"
+	"github.com/charmbracelet/crush/internal/ui/styles"
+	"github.com/stretchr/testify/require"
+)
+
+// newTestRenderer builds a fresh glamour renderer for the given
+// width. We deliberately do NOT share renderers between calls in
+// the equivalence tests so any hidden state in
+// [glamour.TermRenderer] cannot leak from a "cached" rendering
+// path into a "fresh" rendering path.
+func newTestRenderer(t *testing.T, width int) *glamour.TermRenderer {
+	t.Helper()
+	sty := styles.CharmtonePantera()
+	r, err := glamour.NewTermRenderer(
+		glamour.WithStyles(sty.Markdown),
+		glamour.WithWordWrap(width),
+	)
+	require.NoError(t, err)
+	return r
+}
+
+// freshRender renders content as a single document with a fresh
+// glamour renderer and applies the same trailing-newline trim
+// that streamingMarkdown.Render does. Use this for byte- and
+// visible-equivalence comparisons against the streaming path.
+func freshRender(t *testing.T, content string, width int) string {
+	t.Helper()
+	r := newTestRenderer(t, width)
+	out, err := r.Render(content)
+	require.NoError(t, err)
+	return strings.TrimSuffix(out, "\n")
+}
+
+// stripANSI removes all ANSI CSI escape sequences from s so two
+// renders with different colour state can be compared on their
+// visible glyphs alone.
+func stripANSI(s string) string {
+	var b strings.Builder
+	b.Grow(len(s))
+	i := 0
+	for i < len(s) {
+		if s[i] == 0x1b && i+1 < len(s) && s[i+1] == '[' {
+			j := i + 2
+			for j < len(s) {
+				c := s[j]
+				if c >= 0x40 && c <= 0x7e {
+					j++
+					break
+				}
+				j++
+			}
+			i = j
+			continue
+		}
+		b.WriteByte(s[i])
+		i++
+	}
+	return b.String()
+}
+
+// normalizeRender canonicalises a rendered glamour string for
+// visual-equivalence comparison: strip ANSI, drop per-line
+// trailing whitespace, drop leading/trailing blank lines, and
+// collapse consecutive blank lines to a single blank line.
+//
+// Glamour pads rendered lines with trailing spaces and adds top/
+// bottom block margins that differ subtly between "render the
+// whole document at once" and "render two halves and concatenate
+// them." Per F8 design principle D, those byte-level differences
+// are acceptable as long as the visible content matches; this
+// helper makes that comparison explicit.
+func normalizeRender(s string) string {
+	clean := stripANSI(s)
+	lines := strings.Split(clean, "\n")
+	for i, l := range lines {
+		lines[i] = strings.TrimRight(l, " \t")
+	}
+	// Collapse consecutive blank lines.
+	out := make([]string, 0, len(lines))
+	prevBlank := false
+	for _, l := range lines {
+		blank := l == ""
+		if blank && prevBlank {
+			continue
+		}
+		out = append(out, l)
+		prevBlank = blank
+	}
+	// Trim leading and trailing blanks.
+	for len(out) > 0 && out[0] == "" {
+		out = out[1:]
+	}
+	for len(out) > 0 && out[len(out)-1] == "" {
+		out = out[:len(out)-1]
+	}
+	return strings.Join(out, "\n")
+}
+
+// containsRawMarkdownSource reports whether the visible portion of
+// rendered contains literal markdown source markers that should
+// have been consumed by glamour. Used by T2 to assert that
+// intermediate streaming flushes don't leak raw source through to
+// the user. We deliberately only flag markers that glamour
+// removes during rendering ("```" fence delimiters, "|" table
+// pipes embedded in a line that also contains pipes — actual
+// table syntax — and bare "###" headers); pipes-in-prose and
+// dashes are too common to flag.
+func containsRawMarkdownSource(rendered string) bool {
+	clean := stripANSI(rendered)
+	if strings.Contains(clean, "```") {
+		return true
+	}
+	for _, line := range strings.Split(clean, "\n") {
+		if strings.HasPrefix(strings.TrimLeft(line, " \t"), "###") {
+			return true
+		}
+	}
+	return false
+}
+
+// -----------------------------------------------------------------------
+// T1: findSafeMarkdownBoundary unit tests.
+// -----------------------------------------------------------------------
+
+// TestFindSafeMarkdownBoundary_TableDriven exercises the
+// findSafeMarkdownBoundary decision tree across the full set of
+// constructs §4.4 calls out: plain paragraphs, fenced code (open
+// and closed), lists, tables, block quotes, and setext headers.
+func TestFindSafeMarkdownBoundary_TableDriven(t *testing.T) {
+	t.Parallel()
+
+	cases := []struct {
+		name    string
+		content string
+		// want is the expected boundary; -1 means "no safe
+		// boundary." When >=0 the test asserts content[:want]
+		// ends after a blank-line separator and content[:want]
+		// is a complete prefix.
+		want int
+	}{
+		{
+			name:    "empty",
+			content: "",
+			want:    -1,
+		},
+		{
+			name:    "single line",
+			content: "Just a single paragraph",
+			want:    -1,
+		},
+		{
+			name:    "two paragraphs",
+			content: "First paragraph.\n\nSecond paragraph.",
+			// boundary at start of "Second"
+			want: len("First paragraph.\n\n"),
+		},
+		{
+			name:    "three paragraphs picks latest",
+			content: "First.\n\nSecond.\n\nThird.",
+			want:    len("First.\n\nSecond.\n\n"),
+		},
+		{
+			name:    "open fence at end",
+			content: "Para.\n\n```go\nfoo()\n",
+			// no closing fence — every blank-line candidate
+			// before content end is INSIDE the fence (the open
+			// fence opened at offset 7). Actually the ONLY
+			// blank line is between "Para." and "```go", so
+			// candidate boundary is right before "```go". At
+			// that point fence count = 0, even, but the line
+			// AFTER (the first non-blank) is "```go" which
+			// would change rendering of the prefix… hmm,
+			// actually it wouldn't change the prefix's
+			// rendering because the prefix is just "Para.\n\n".
+			// The boundary would be ACCEPTED. Let's check
+			// what our impl does.
+			want: len("Para.\n\n"),
+		},
+		{
+			name:    "inside open fence: no candidate after open",
+			content: "Para.\n\n```go\nfoo()\n\nbar()\n",
+			// blank line after "foo()" is INSIDE the fence
+			// (fence count at that prefix = 1, odd), must
+			// reject. The earlier blank line between "Para."
+			// and "```go" should still be safe (fence count
+			// at that prefix = 0).
+			want: len("Para.\n\n"),
+		},
+		{
+			name:    "closed fence followed by paragraph",
+			content: "Para1.\n\n```\nfoo()\n```\n\nPara2.",
+			// latest blank line is between "```" and "Para2.";
+			// fence count at that prefix = 2 (even), last
+			// non-blank line is "```" which is not a list/
+			// table/quote/setext.
+			want: len("Para1.\n\n```\nfoo()\n```\n\n"),
+		},
+		{
+			name:    "open list at end",
+			content: "Para.\n\n- one\n- two\n",
+			// last non-blank line of any blank-bounded prefix
+			// is a list item; our boundary check rejects.
+			// The blank line between "Para." and "- one" is
+			// the only candidate, but the line AFTER (first
+			// non-blank of suffix) is "- one" — that's fine,
+			// a list opening doesn't change the prefix's
+			// rendering. So the boundary BEFORE the list is
+			// accepted.
+			want: len("Para.\n\n"),
+		},
+		{
+			name:    "list interior: no boundary",
+			content: "- one\n- two\n",
+			// no blank line at all.
+			want: -1,
+		},
+		{
+			name:    "closed list then paragraph",
+			content: "- one\n- two\n\nPara.",
+			// blank line after the list. Last non-blank line
+			// of prefix is "- two" — a list item — so the
+			// candidate is REJECTED. (Conservative: we don't
+			// know the list is "closed" without looking at
+			// what follows.)
+			want: -1,
+		},
+		{
+			name:    "table at end",
+			content: "Para.\n\n| a | b |\n| --- | --- |\n| 1 | 2 |\n",
+			// blank-line candidate is between "Para." and
+			// table opener. Last non-blank line of prefix is
+			// "Para." — fine. Line AFTER is "| a | b |"
+			// which is a table line; doesn't retroactively
+			// change "Para." Boundary accepted.
+			want: len("Para.\n\n"),
+		},
+		{
+			name:    "table interior with internal blank line: no late boundary",
+			content: "| a | b |\n| --- | --- |\n\n| 1 | 2 |\n",
+			// the blank line in the middle is followed by
+			// another table line. Last non-blank line of
+			// prefix is "| --- | --- |" which contains a
+			// pipe — we reject.
+			want: -1,
+		},
+		{
+			name:    "block quote at end",
+			content: "Para.\n\n> quoted\n> still quoted\n",
+			// Last non-blank line of any prefix that ends
+			// inside the quote block is a "> ..." line —
+			// rejected. The blank line BEFORE the quote
+			// gives a prefix of "Para.\n\n" — last non-blank
+			// "Para." — accepted.
+			want: len("Para.\n\n"),
+		},
+		{
+			name:    "setext underline pending",
+			content: "Heading\n\n=====\n",
+			// blank line between "Heading" and "=====".
+			// Prefix = "Heading\n\n", last non-blank "Heading"
+			// — fine. But the FIRST non-blank line of the
+			// suffix is "=====", a setext-underline
+			// candidate. Splitting here would render the
+			// prefix as a paragraph "Heading", but the
+			// canonical render would treat the whole thing
+			// as a setext header. Reject.
+			//
+			// (Note: per CommonMark, a blank line between a
+			// paragraph and an underline actually breaks the
+			// setext, so the setext interpretation may not
+			// apply. But the boundary check is conservative
+			// — being wrong costs one slow frame, being
+			// over-aggressive costs visible breakage.)
+			want: -1,
+		},
+		{
+			name:    "indented code at end of prefix",
+			content: "Para.\n\n    code line\n\nNext.",
+			// prefix candidates:
+			//   "Para.\n\n" — last non-blank "Para.", accepted
+			//   "Para.\n\n    code line\n\n" — last non-blank
+			//   is "    code line" which is indented 4
+			//   spaces — REJECTED.
+			// Latest accepted is the first.
+			want: len("Para.\n\n"),
+		},
+	}
+
+	for _, c := range cases {
+		t.Run(c.name, func(t *testing.T) {
+			t.Parallel()
+			got := findSafeMarkdownBoundary(c.content)
+			require.Equalf(t, c.want, got,
+				"findSafeMarkdownBoundary(%q) = %d, want %d", c.content, got, c.want)
+			if got > 0 {
+				// Boundary must point to the start of a line
+				// (i.e. just after a newline) when the prefix
+				// is non-empty.
+				require.True(t, got <= len(c.content),
+					"boundary %d out of range (len=%d)", got, len(c.content))
+				if got > 0 && got <= len(c.content) {
+					require.Equal(t, byte('\n'), c.content[got-1],
+						"boundary %d does not sit immediately after a newline", got)
+				}
+			}
+		})
+	}
+}
+
+// -----------------------------------------------------------------------
+// T2: streaming-equivalence tests.
+// -----------------------------------------------------------------------
+
+// streamingScenarios returns the four canonical document shapes
+// that exercise different boundary-detection paths.
+func streamingScenarios() []struct {
+	name string
+	doc  string
+} {
+	return []struct {
+		name string
+		doc  string
+	}{
+		{
+			name: "plain-paragraphs",
+			doc: strings.Join([]string{
+				"This is the first paragraph of the document.",
+				"",
+				"Here is the second paragraph; it has some words.",
+				"",
+				"And a third paragraph for good measure.",
+				"",
+				"Finally a fourth paragraph to push past one boundary.",
+			}, "\n"),
+		},
+		{
+			name: "paragraphs-with-fence",
+			doc: strings.Join([]string{
+				"Intro paragraph.",
+				"",
+				"Some explanatory prose before the code.",
+				"",
+				"```go",
+				"func hello() {",
+				"\tfmt.Println(\"hi\")",
+				"}",
+				"```",
+				"",
+				"And a closing paragraph after the code block.",
+			}, "\n"),
+		},
+		{
+			name: "paragraphs-with-list",
+			doc: strings.Join([]string{
+				"Intro paragraph.",
+				"",
+				"- list item one",
+				"- list item two",
+				"- list item three",
+				"",
+				"Trailing paragraph.",
+			}, "\n"),
+		},
+		{
+			name: "paragraphs-with-table",
+			doc: strings.Join([]string{
+				"Intro paragraph.",
+				"",
+				"| col a | col b |",
+				"| ----- | ----- |",
+				"| 1     | 2     |",
+				"| 3     | 4     |",
+				"",
+				"Trailing paragraph after the table.",
+			}, "\n"),
+		},
+	}
+}
+
+// progressivePrefixes splits doc into n monotonically growing
+// byte prefixes, ending with the full document. n>=1.
+func progressivePrefixes(doc string, n int) []string {
+	if n < 1 {
+		n = 1
+	}
+	out := make([]string, 0, n)
+	for i := 1; i <= n; i++ {
+		// integer scaling so the last entry is exactly len(doc)
+		size := len(doc) * i / n
+		if i == n {
+			size = len(doc)
+		}
+		out = append(out, doc[:size])
+	}
+	return out
+}
+
+// TestStreamingMarkdown_FinalVisuallyEquivalent drives a sequence
+// of progressive prefixes through streamingMarkdown and asserts
+// the FINAL output is visually equivalent (per design principle
+// D) to a fresh full-document render. Strict byte-equality is
+// not the bar — see the comment in normalizeRender for why.
+func TestStreamingMarkdown_FinalVisuallyEquivalent(t *testing.T) {
+	t.Parallel()
+
+	const width = 80
+	const steps = 15
+
+	for _, sc := range streamingScenarios() {
+		t.Run(sc.name, func(t *testing.T) {
+			t.Parallel()
+			renderer := newTestRenderer(t, width)
+			var sm streamingMarkdown
+			prefixes := progressivePrefixes(sc.doc, steps)
+
+			var lastOut string
+			for _, p := range prefixes {
+				lastOut = sm.Render(p, width, renderer)
+			}
+
+			fresh := freshRender(t, sc.doc, width)
+			require.Equal(t, normalizeRender(fresh), normalizeRender(lastOut),
+				"final streaming output must match a fresh full render visually")
+		})
+	}
+}
+
+// TestStreamingMarkdown_IntermediateOutputsPlausible asserts that
+// every intermediate flush returns a non-empty string and does
+// not leak raw markdown source through to the user. This is the
+// "visually plausible" half of T2.
+func TestStreamingMarkdown_IntermediateOutputsPlausible(t *testing.T) {
+	t.Parallel()
+
+	const width = 80
+	const steps = 12
+
+	for _, sc := range streamingScenarios() {
+		t.Run(sc.name, func(t *testing.T) {
+			t.Parallel()
+			renderer := newTestRenderer(t, width)
+			var sm streamingMarkdown
+
+			for i, p := range progressivePrefixes(sc.doc, steps) {
+				if p == "" {
+					continue
+				}
+				out := sm.Render(p, width, renderer)
+				require.NotEmptyf(t, out, "step %d: empty render for prefix len %d", i, len(p))
+				require.Falsef(t, containsRawMarkdownSource(out),
+					"step %d: render leaked raw markdown source.\nprefix=%q\nout=%s",
+					i, p, normalizeRender(out))
+			}
+		})
+	}
+}
+
+// -----------------------------------------------------------------------
+// T3: cache invalidation tests.
+// -----------------------------------------------------------------------
+
+// TestStreamingMarkdown_WidthChangeInvalidates asserts that a
+// width change blows away the cached prefix so the next render
+// is keyed against the new width. We can't observe the cache
+// directly without reaching into the struct, so we assert the
+// observable contract: after a width change, the rendered output
+// reflects the new width AND the streamingMarkdown's internal
+// cache fields are reset to the new state.
+func TestStreamingMarkdown_WidthChangeInvalidates(t *testing.T) {
+	t.Parallel()
+
+	doc := "Para one.\n\nPara two.\n\nPara three."
+	r80 := newTestRenderer(t, 80)
+	r40 := newTestRenderer(t, 40)
+	var sm streamingMarkdown
+
+	out80 := sm.Render(doc, 80, r80)
+	require.Equal(t, 80, sm.width, "width must be cached after first render")
+	cachedPrefix := sm.stablePrefix
+
+	out40 := sm.Render(doc, 40, r40)
+	require.Equal(t, 40, sm.width, "width change must update cached width")
+	require.NotEqual(t, out80, out40,
+		"different widths must produce different rendered output")
+	// stablePrefix may legitimately have re-advanced after the
+	// reset (tryAdvanceFromEmpty), but if it has, it can no
+	// longer carry the OLD width's render. We assert the cache
+	// reset by checking that the cached prefix length is at
+	// most the current content length.
+	require.True(t, len(sm.stablePrefix) <= len(doc),
+		"stable prefix must be a prefix of the current content")
+	_ = cachedPrefix
+}
+
+// TestStreamingMarkdown_NonPrefixContentInvalidates verifies
+// that content which is NOT a prefix-extension of the cached
+// stable prefix triggers a Reset and a fresh render path. This
+// guards the "user retried the turn" case.
+func TestStreamingMarkdown_NonPrefixContentInvalidates(t *testing.T) {
+	t.Parallel()
+
+	const width = 80
+	r := newTestRenderer(t, width)
+	var sm streamingMarkdown
+
+	// Drive a streaming sequence so the cache picks up a stable
+	// prefix.
+	doc := "Para one.\n\nPara two.\n\nPara three."
+	for _, p := range progressivePrefixes(doc, 6) {
+		_ = sm.Render(p, width, r)
+	}
+	require.NotEmpty(t, sm.stablePrefix,
+		"stable prefix must be populated after streaming a multi-paragraph doc")
+
+	// Now switch to entirely different content (user retried).
+	other := "Completely different opening paragraph.\n\nAnd a second."
+	out := sm.Render(other, width, r)
+	require.NotEmpty(t, out)
+	// stablePrefix must be a prefix of `other`, i.e. cache was
+	// reset off the OLD content.
+	require.True(t, strings.HasPrefix(other, sm.stablePrefix),
+		"stable prefix must be reset to a prefix of the new content")
+
+	// Visual equivalence to a fresh render of `other`.
+	fresh := freshRender(t, other, width)
+	require.Equal(t, normalizeRender(fresh), normalizeRender(out),
+		"render after non-prefix content change must match a fresh render")
+}
+
+// TestStreamingMarkdown_ResetClearsCache asserts Reset() drops
+// every cached field; the next render is necessarily a full
+// render path.
+func TestStreamingMarkdown_ResetClearsCache(t *testing.T) {
+	t.Parallel()
+
+	const width = 80
+	r := newTestRenderer(t, width)
+	var sm streamingMarkdown
+
+	doc := "Para one.\n\nPara two.\n\nPara three."
+	_ = sm.Render(doc, width, r)
+	// The sample doc has safe boundaries so the cache should
+	// have advanced. If for some reason it didn't, we still
+	// want Reset to be a no-op-safe operation; assert the
+	// post-Reset state directly.
+	sm.Reset()
+	require.Equal(t, 0, sm.width)
+	require.Equal(t, "", sm.stablePrefix)
+	require.Equal(t, "", sm.stablePrefixRender)
+
+	// Next render must be a full render path. Drive one step
+	// and verify the output matches a fresh full render.
+	out := sm.Render(doc, width, r)
+	fresh := freshRender(t, doc, width)
+	require.Equal(t, normalizeRender(fresh), normalizeRender(out))
+}
+
+// -----------------------------------------------------------------------
+// T4: fallback safety.
+// -----------------------------------------------------------------------
+
+// TestStreamingMarkdown_NoSafeBoundaryAlwaysFullRenders covers
+// the "one giant table being built character by character" case.
+// Every flush must fall back to a full render; the cache must
+// not advance into an unsafe state. We compare each flush to a
+// fresh full render of the same prefix; bytes must match for
+// each prefix individually.
+//
+// (Byte equality is sound here because no concatenation happens:
+// the streaming path delegates straight to renderer.Render when
+// the cache is empty and no safe boundary exists.)
+func TestStreamingMarkdown_NoSafeBoundaryAlwaysFullRenders(t *testing.T) {
+	t.Parallel()
+
+	const width = 80
+
+	// One growing table — no blank lines anywhere, so no
+	// boundary candidate is ever found.
+	doc := strings.Join([]string{
+		"| col a | col b | col c |",
+		"| ----- | ----- | ----- |",
+		"| 1     | 2     | 3     |",
+		"| 4     | 5     | 6     |",
+		"| 7     | 8     | 9     |",
+		"| 10    | 11    | 12    |",
+		"| 13    | 14    | 15    |",
+		"| 16    | 17    | 18    |",
+		"| 19    | 20    | 21    |",
+		"| 22    | 23    | 24    |",
+	}, "\n")
+	require.Equal(t, -1, findSafeMarkdownBoundary(doc),
+		"sanity check: no blank lines, no safe boundary")
+
+	r := newTestRenderer(t, width)
+	var sm streamingMarkdown
+
+	prefixes := progressivePrefixes(doc, 10)
+	for i, p := range prefixes {
+		if p == "" {
+			continue
+		}
+		out := sm.Render(p, width, r)
+		fresh := freshRender(t, p, width)
+		require.Equalf(t, fresh, out,
+			"step %d (len=%d): streaming output must byte-equal a fresh render when boundary detection fails",
+			i, len(p))
+	}
+	// Cache must remain empty: no boundary was ever found, no
+	// width change occurred, no advance ever cached anything.
+	require.Equal(t, "", sm.stablePrefix,
+		"stable prefix must remain empty when no safe boundary ever exists")
+}
+
+// TestStreamingMarkdown_NoSafeBoundaryDoesNotCrash is the
+// minimum-viability assertion of T4: even when boundary
+// detection fails on every flush the streaming path must not
+// crash and must produce non-empty output for non-empty input.
+func TestStreamingMarkdown_NoSafeBoundaryDoesNotCrash(t *testing.T) {
+	t.Parallel()
+
+	const width = 80
+	r := newTestRenderer(t, width)
+	var sm streamingMarkdown
+
+	// A deeply-pathological input: a single line that grows
+	// one character at a time. There is never a blank-line
+	// separator so the cache is never advanced.
+	src := "The quick brown fox jumps over the lazy dog."
+	for i := 1; i <= len(src); i++ {
+		out := sm.Render(src[:i], width, r)
+		require.NotEmpty(t, out, "streaming output must not be empty for non-empty input")
+	}
+}
+
+// -----------------------------------------------------------------------
+// Integration assertions on the wired-in path.
+// -----------------------------------------------------------------------
+
+// -----------------------------------------------------------------------
+// T5 / T6 / T7: anywhere-in-prefix hazards (B1 / B2 / B3 from the
+// F8 round-2 review). For each hazard we drive every progressive
+// prefix of a document that exercises the hazard through the cache
+// and assert two contracts:
+//
+//  1. The cached stable prefix never contains the hazard. If the
+//     hazard line is at byte offset H, then after every flush
+//     len(sm.stablePrefix) <= H. This is the "no silent
+//     corruption" half — the algorithm cannot accept a boundary
+//     that splits across the hazard.
+//
+//  2. The final flush is visually equivalent to a fresh full
+//     render of the complete document. This is the same T2-style
+//     equivalence assertion ported to the new doc shapes.
+// -----------------------------------------------------------------------
+
+// nonBlankLines returns the non-blank visible lines of s with
+// per-line trailing whitespace trimmed. Used to compare two
+// rendered fragments for content equivalence when paragraph-
+// margin behaviour legitimately differs between a single fresh
+// render and a streaming split render (per F8 design principle D
+// — visual equivalence is the bar, byte-equivalence is not).
+//
+// Some glamour block types (notably HTML blocks and reference
+// link definitions) interact with adjacent paragraph blocks
+// during a single render — adjacency effectively suppresses the
+// blank-line margin between blocks. When the streaming path
+// renders the prefix and trail in separate calls, the seam is
+// re-introduced as a blank line. The visible TEXT is identical;
+// only the inter-block margin differs.
+func nonBlankLines(s string) []string {
+	clean := stripANSI(s)
+	out := make([]string, 0)
+	for _, l := range strings.Split(clean, "\n") {
+		l = strings.TrimRight(l, " \t")
+		if strings.TrimSpace(l) == "" {
+			continue
+		}
+		out = append(out, l)
+	}
+	return out
+}
+
+// runProgressiveBoundaryRespectTest is the shared body of T5/T6/T7.
+// It accepts a document and the byte offset of the line whose
+// PRESENCE in the prefix must trigger the hazard reject; the
+// cached stable prefix may never extend past hazardLineOffset.
+//
+// The final-output equivalence check is content-based (non-blank
+// lines compared) rather than full-normalization: see
+// nonBlankLines for the reason.
+func runProgressiveBoundaryRespectTest(t *testing.T, doc string, hazardLineOffset int) {
+	t.Helper()
+	const width = 80
+	const steps = 25
+
+	renderer := newTestRenderer(t, width)
+	var sm streamingMarkdown
+
+	prefixes := progressivePrefixes(doc, steps)
+	var lastOut string
+	for i, p := range prefixes {
+		if p == "" {
+			continue
+		}
+		lastOut = sm.Render(p, width, renderer)
+		require.NotEmptyf(t, lastOut, "step %d: empty render", i)
+		require.LessOrEqualf(t, len(sm.stablePrefix), hazardLineOffset,
+			"step %d: cached stable prefix advanced past the hazard line\n"+
+				"prefix len=%d, hazard at %d, sm.stablePrefix=%q",
+			i, len(sm.stablePrefix), hazardLineOffset, sm.stablePrefix)
+	}
+
+	fresh := freshRender(t, doc, width)
+	require.Equal(t, nonBlankLines(fresh), nonBlankLines(lastOut),
+		"final streaming output must contain the same non-blank lines as a fresh full render")
+}
+
+// TestStreamingMarkdown_LooseListContinuation locks in the B1 fix.
+// A loose list followed by a continuation paragraph and then a
+// trailing paragraph creates a candidate boundary between the list
+// item and its continuation; the trailing non-blank line of that
+// candidate prefix is the continuation paragraph (not a list
+// marker), so the line-only check would accept it. The
+// anywhere-in-prefix list-marker check rejects it.
+func TestStreamingMarkdown_LooseListContinuation(t *testing.T) {
+	t.Parallel()
+
+	doc := strings.Join([]string{
+		"Intro paragraph.",
+		"",
+		"- item one",
+		"",
+		"  continuation paragraph still belongs to item one",
+		"",
+		"- item two",
+		"",
+		"Trailing paragraph after the list.",
+	}, "\n")
+
+	// The first list marker line begins after "Intro paragraph.\n\n".
+	// The cached stable prefix may include that boundary (BEFORE
+	// the list opens) but must never advance into the list.
+	hazardOffset := strings.Index(doc, "- item one")
+	require.Greater(t, hazardOffset, 0, "test setup")
+
+	runProgressiveBoundaryRespectTest(t, doc, hazardOffset)
+}
+
+// TestStreamingMarkdown_HTMLBlock locks in the B2 fix. A raw HTML
+// block followed by a paragraph creates a candidate boundary
+// between the closed HTML block and the trailing paragraph. The
+// anywhere-in-prefix HTML-opener check rejects any boundary that
+// would include the HTML block in the stable prefix.
+func TestStreamingMarkdown_HTMLBlock(t *testing.T) {
+	t.Parallel()
+
+	doc := strings.Join([]string{
+		"Intro paragraph.",
+		"",
+		"<div>",
+		"some block content",
+		"</div>",
+		"",
+		"Trailing paragraph after the HTML block.",
+	}, "\n")
+
+	hazardOffset := strings.Index(doc, "<div>")
+	require.Greater(t, hazardOffset, 0, "test setup")
+
+	runProgressiveBoundaryRespectTest(t, doc, hazardOffset)
+}
+
+// TestStreamingMarkdown_HTMLBlockType7 covers HTML block type 7
+// (CommonMark): a generic open/close tag whose name is NOT in the
+// fixed type-6 set still opens an HTML block and must forfeit any
+// boundary that would split the block off from following content.
+func TestStreamingMarkdown_HTMLBlockType7(t *testing.T) {
+	t.Parallel()
+
+	doc := strings.Join([]string{
+		"Intro paragraph.",
+		"",
+		"<custom-tag>",
+		"some block content",
+		"</custom-tag>",
+		"",
+		"Trailing paragraph after the custom-tag block.",
+	}, "\n")
+
+	hazardOffset := strings.Index(doc, "<custom-tag>")
+	require.Greater(t, hazardOffset, 0, "test setup")
+
+	runProgressiveBoundaryRespectTest(t, doc, hazardOffset)
+}
+
+// TestStreamingMarkdown_LinkRefDefinition locks in the B3 fix. A
+// reference link definition followed by a paragraph that uses the
+// reference creates a boundary candidate between the def and the
+// paragraph; rendering them in separate glamour passes loses the
+// definition. The anywhere-in-prefix ref-def check rejects.
+func TestStreamingMarkdown_LinkRefDefinition(t *testing.T) {
+	t.Parallel()
+
+	doc := strings.Join([]string{
+		"Intro paragraph.",
+		"",
+		"[ref]: http://example.com",
+		"",
+		"Trailing paragraph that links to [the example][ref] inline.",
+	}, "\n")
+
+	hazardOffset := strings.Index(doc, "[ref]:")
+	require.Greater(t, hazardOffset, 0, "test setup")
+
+	runProgressiveBoundaryRespectTest(t, doc, hazardOffset)
+}
+
+// TestAssistantStreamingContent_ResetOnClearCache guards the
+// integration contract that ClearItemCaches (style change) drops
+// the streaming-markdown cache. Without this, a style change
+// would leave the OLD style's ANSI sequences embedded in the
+// stable-prefix render and the next flush would visually mix
+// styles.
+func TestAssistantStreamingContent_ResetOnClearCache(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	doc := "Para one.\n\nPara two.\n\nPara three."
+	msg := finishedAssistantMessage("stream-clear", doc)
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	const width = 80
+	_ = item.RawRender(width)
+	// Drive a second message that extends the content so the
+	// streaming cache has a chance to advance (if it would).
+	doc2 := doc + "\n\nFour."
+	item.SetMessage(finishedAssistantMessage("stream-clear", doc2))
+	_ = item.RawRender(width)
+
+	// Now wipe the caches the way ClearItemCaches does.
+	item.clearCache()
+
+	require.Equal(t, "", item.streamingContent.stablePrefix,
+		"clearCache must Reset the streaming-markdown cache")
+	require.Equal(t, "", item.streamingContent.stablePrefixRender)
+	require.Equal(t, 0, item.streamingContent.width)
+}

internal/ui/chat/messages.go 🔗

@@ -75,6 +75,12 @@ type SendMsg struct {
 }
 
 type highlightableMessageItem struct {
+	// version is the parent item's version counter. SetHighlight
+	// bumps it on every observable change so the F6 list memo and
+	// any frozen entry get invalidated when a selection drag enters
+	// or leaves the item.
+	version *list.Versioned
+
 	startLine   int
 	startCol    int
 	endLine     int
@@ -103,13 +109,20 @@ func (h *highlightableMessageItem) SetHighlight(startLine int, startCol int, end
 	// Adjust columns for the style's left inset (border + padding) since we
 	// highlight the content only.
 	offset := MessageLeftPaddingTotal
+	newStartCol := max(0, startCol-offset)
+	newEndCol := endCol
+	if endCol >= 0 {
+		newEndCol = max(0, endCol-offset)
+	}
+	if h.startLine == startLine && h.startCol == newStartCol && h.endLine == endLine && h.endCol == newEndCol {
+		return
+	}
 	h.startLine = startLine
-	h.startCol = max(0, startCol-offset)
+	h.startCol = newStartCol
 	h.endLine = endLine
-	if endCol >= 0 {
-		h.endCol = max(0, endCol-offset)
-	} else {
-		h.endCol = endCol
+	h.endCol = newEndCol
+	if h.version != nil {
+		h.version.Bump()
 	}
 }
 
@@ -118,8 +131,9 @@ func (h *highlightableMessageItem) Highlight() (startLine int, startCol int, end
 	return h.startLine, h.startCol, h.endLine, h.endCol
 }
 
-func defaultHighlighter(sty *styles.Styles) *highlightableMessageItem {
+func defaultHighlighter(sty *styles.Styles, v *list.Versioned) *highlightableMessageItem {
 	return &highlightableMessageItem{
+		version:     v,
 		startLine:   -1,
 		startCol:    -1,
 		endLine:     -1,
@@ -135,12 +149,17 @@ type cacheClearable interface {
 }
 
 // ClearItemCaches drops any cached rendered output on each item so the
-// next render uses the current styles.
+// next render uses the current styles. It also bumps each item's
+// version so the F6 list-level memo invalidates frozen entries on
+// the next render.
 func ClearItemCaches(items []MessageItem) {
 	for _, item := range items {
 		if cc, ok := item.(cacheClearable); ok {
 			cc.clearCache()
 		}
+		if v, ok := item.(interface{ Bump() }); ok {
+			v.Bump()
+		}
 	}
 }
 
@@ -156,6 +175,20 @@ type cachedMessageItem struct {
 	// width and height are the dimensions of the cached render
 	width  int
 	height int
+
+	// prefixedRendered caches the per-line-prefixed Render output (the
+	// result of splitting RawRender by newlines and prepending a focus
+	// or selection prefix to every line). Items rebuild this every
+	// frame today; caching it keyed by (prefixedWidth, prefixedKey)
+	// turns Render into a pointer return when item state is stable.
+	//
+	// Invalidation lives in clearCache; callers must additionally
+	// bypass this cache whenever the prefixed output would not be
+	// stable (spinner ticks, active highlight ranges) by not calling
+	// setCachedPrefixedRender for those frames.
+	prefixedRendered string
+	prefixedWidth    int
+	prefixedKey      uint64
 }
 
 // getCachedRender returns the cached render if it exists for the given width.
@@ -173,21 +206,57 @@ func (c *cachedMessageItem) setCachedRender(rendered string, width, height int)
 	c.height = height
 }
 
+// getCachedPrefixedRender returns the cached prefixed render if it exists
+// for the given (width, key). The key encodes any state that changes the
+// per-line prefix (focused/blurred, compact, ...).
+func (c *cachedMessageItem) getCachedPrefixedRender(width int, key uint64) (string, bool) {
+	if c.prefixedRendered != "" && c.prefixedWidth == width && c.prefixedKey == key {
+		return c.prefixedRendered, true
+	}
+	return "", false
+}
+
+// setCachedPrefixedRender stores the cached prefixed render.
+func (c *cachedMessageItem) setCachedPrefixedRender(rendered string, width int, key uint64) {
+	c.prefixedRendered = rendered
+	c.prefixedWidth = width
+	c.prefixedKey = key
+}
+
 // clearCache clears the cached render.
 func (c *cachedMessageItem) clearCache() {
 	c.rendered = ""
 	c.width = 0
 	c.height = 0
+	c.prefixedRendered = ""
+	c.prefixedWidth = 0
+	c.prefixedKey = 0
 }
 
 // focusableMessageItem is a base struct for message items that can be focused.
 type focusableMessageItem struct {
+	// version is the parent item's version counter. SetFocused
+	// bumps it whenever focus actually flips so the F6 list memo
+	// invalidates the per-line focus prefix.
+	version *list.Versioned
 	focused bool
 }
 
+// newFocusableMessageItem returns a focusableMessageItem wired to the
+// shared version counter.
+func newFocusableMessageItem(v *list.Versioned) *focusableMessageItem {
+	return &focusableMessageItem{version: v}
+}
+
 // SetFocused implements MessageItem.
 func (f *focusableMessageItem) SetFocused(focused bool) {
+	if f.focused == focused {
+		return
+	}
 	f.focused = focused
+	if f.version != nil {
+		f.version.Bump()
+	}
 }
 
 // AssistantInfoID returns a stable ID for assistant info items.
@@ -197,6 +266,7 @@ func AssistantInfoID(messageID string) string {
 
 // AssistantInfoItem renders model info and response time after assistant completes.
 type AssistantInfoItem struct {
+	*list.Versioned
 	*cachedMessageItem
 
 	id                  string
@@ -209,6 +279,7 @@ type AssistantInfoItem struct {
 // NewAssistantInfoItem creates a new AssistantInfoItem.
 func NewAssistantInfoItem(sty *styles.Styles, message *message.Message, cfg *config.Config, lastUserMessageTime time.Time) MessageItem {
 	return &AssistantInfoItem{
+		Versioned:           list.NewVersioned(),
 		cachedMessageItem:   &cachedMessageItem{},
 		id:                  AssistantInfoID(message.ID),
 		message:             message,
@@ -218,6 +289,13 @@ func NewAssistantInfoItem(sty *styles.Styles, message *message.Message, cfg *con
 	}
 }
 
+// Finished implements list.Item. Assistant info blocks render a fixed
+// model/duration footer once the assistant turn finishes; the data
+// is immutable after construction so the entry is safe to freeze.
+func (a *AssistantInfoItem) Finished() bool {
+	return true
+}
+
 // ID implements MessageItem.
 func (a *AssistantInfoItem) ID() string {
 	return a.id
@@ -237,12 +315,20 @@ func (a *AssistantInfoItem) RawRender(width int) string {
 
 // Render implements MessageItem.
 func (a *AssistantInfoItem) Render(width int) string {
+	// AssistantInfoItem uses a single, state-independent prefix; key 0
+	// is sufficient. The cache is invalidated whenever the underlying
+	// cachedMessageItem render is cleared.
+	if cached, ok := a.getCachedPrefixedRender(width, 0); ok {
+		return cached
+	}
 	prefix := a.sty.Messages.SectionHeader.Render()
 	lines := strings.Split(a.RawRender(width), "\n")
 	for i, line := range lines {
 		lines[i] = prefix + line
 	}
-	return strings.Join(lines, "\n")
+	out := strings.Join(lines, "\n")
+	a.setCachedPrefixedRender(out, width, 0)
+	return out
 }
 
 func (a *AssistantInfoItem) renderContent(width int) string {

internal/ui/chat/prefix_cache_test.go 🔗

@@ -0,0 +1,196 @@
+package chat
+
+import (
+	"strings"
+	"testing"
+	"time"
+
+	"github.com/charmbracelet/crush/internal/message"
+	"github.com/charmbracelet/crush/internal/ui/attachments"
+	"github.com/charmbracelet/crush/internal/ui/styles"
+	"github.com/stretchr/testify/require"
+)
+
+// finishedAssistantMessage builds an assistant message with text content and a
+// finish part so AssistantMessageItem.isSpinning returns false and the
+// prefix cache is exercised.
+func finishedAssistantMessage(id, text string) *message.Message {
+	return &message.Message{
+		ID:   id,
+		Role: message.Assistant,
+		Parts: []message.ContentPart{
+			message.TextContent{Text: text},
+			message.Finish{Reason: message.FinishReasonEndTurn, Time: time.Now().Unix()},
+		},
+	}
+}
+
+// TestAssistantMessageItemRender_PrefixCacheFocusBlur covers the F3 invariant
+// that focus → blur → focus produces the correct prefix every time and never
+// leaks the previous focus state out of the cache.
+func TestAssistantMessageItemRender_PrefixCacheFocusBlur(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	msg := finishedAssistantMessage("m1", "Hello world from the cache test.")
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+
+	const width = 60
+
+	item.SetFocused(true)
+	focused1 := item.Render(width)
+	focused2 := item.Render(width)
+	require.Equal(t, focused1, focused2, "second render must hit the cache and match the first")
+
+	item.SetFocused(false)
+	blurred1 := item.Render(width)
+	require.NotEqual(t, focused1, blurred1, "blur must produce a different prefixed render than focus")
+
+	item.SetFocused(true)
+	focused3 := item.Render(width)
+	require.Equal(t, focused1, focused3, "re-focus must produce identical output to the original focused render")
+}
+
+// TestAssistantMessageItemRender_PrefixCacheWidthInvalidates asserts that a
+// width change does not return the previous width's cached output.
+func TestAssistantMessageItemRender_PrefixCacheWidthInvalidates(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	msg := finishedAssistantMessage("m2", "Some content that wraps differently at different widths so the rendered output diverges.")
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+	item.SetFocused(true)
+
+	narrow := item.Render(40)
+	wide := item.Render(100)
+	require.NotEqual(t, narrow, wide, "different widths must produce different rendered output")
+
+	narrowAgain := item.Render(40)
+	require.Equal(t, narrow, narrowAgain, "returning to the original width must hit (or repopulate) the cache with the same output")
+}
+
+// TestAssistantMessageItemRender_PrefixCacheHighlightOnTop guarantees that
+// activating a highlight range bypasses the prefix cache so selection drags
+// reflect immediately, and that clearing the highlight returns to the cached
+// prefixed output unchanged.
+func TestAssistantMessageItemRender_PrefixCacheHighlightOnTop(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	msg := finishedAssistantMessage("m3", "Hello world from the highlight test.")
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+	item.SetFocused(true)
+
+	const width = 60
+	plain := item.Render(width)
+
+	// Activating a highlight must change the rendered output (selection
+	// painted on top) without poisoning the cache for the un-highlighted
+	// state that follows.
+	item.SetHighlight(0, 0, 0, 5)
+	highlighted := item.Render(width)
+	require.NotEqual(t, plain, highlighted, "active highlight must change Render output")
+
+	// Clear the highlight; the cached un-highlighted prefix render must
+	// be returned unchanged.
+	item.SetHighlight(-1, -1, -1, -1)
+	plainAfter := item.Render(width)
+	require.Equal(t, plain, plainAfter, "clearing the highlight must restore the cached prefixed output exactly")
+}
+
+// TestUserMessageItemRender_PrefixCacheFocusBlur is the user-message
+// counterpart of the assistant focus/blur cache test.
+func TestUserMessageItemRender_PrefixCacheFocusBlur(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	msg := &message.Message{
+		ID:   "u1",
+		Role: message.User,
+		Parts: []message.ContentPart{
+			message.TextContent{Text: "Hello from the user."},
+		},
+	}
+	r := attachments.NewRenderer(
+		sty.Attachments.Normal,
+		sty.Attachments.Deleting,
+		sty.Attachments.Image,
+		sty.Attachments.Text,
+	)
+	item := NewUserMessageItem(&sty, msg, r).(*UserMessageItem)
+
+	const width = 60
+
+	item.SetFocused(true)
+	focused1 := item.Render(width)
+	focused2 := item.Render(width)
+	require.Equal(t, focused1, focused2)
+
+	item.SetFocused(false)
+	blurred := item.Render(width)
+	require.NotEqual(t, focused1, blurred)
+
+	item.SetFocused(true)
+	focused3 := item.Render(width)
+	require.Equal(t, focused1, focused3)
+}
+
+// TestCachedMessageItem_PrefixCacheSemantics covers the constant-prefix
+// path used by AssistantInfoItem and the (width, key) keying used by every
+// item, against the underlying cachedMessageItem helper directly. This
+// avoids constructing a full *config.Config with an initialized provider
+// map just to exercise cache plumbing that is identical for all callers.
+func TestCachedMessageItem_PrefixCacheSemantics(t *testing.T) {
+	t.Parallel()
+
+	c := &cachedMessageItem{}
+
+	// Empty cache: miss.
+	_, ok := c.getCachedPrefixedRender(80, 0)
+	require.False(t, ok)
+
+	// Set then hit at the same (width, key).
+	c.setCachedPrefixedRender("hello", 80, 0)
+	got, ok := c.getCachedPrefixedRender(80, 0)
+	require.True(t, ok)
+	require.Equal(t, "hello", got)
+
+	// Different width: miss.
+	_, ok = c.getCachedPrefixedRender(120, 0)
+	require.False(t, ok)
+
+	// Different key (focused vs blurred): miss.
+	_, ok = c.getCachedPrefixedRender(80, 1)
+	require.False(t, ok)
+
+	// clearCache drops the prefixed cache too.
+	c.setCachedRender("raw", 80, 1)
+	c.setCachedPrefixedRender("hello", 80, 0)
+	c.clearCache()
+	_, ok = c.getCachedPrefixedRender(80, 0)
+	require.False(t, ok, "clearCache must drop the prefixed render cache")
+	_, _, ok = c.getCachedRender(80)
+	require.False(t, ok, "clearCache must also drop the raw render cache")
+}
+
+// TestAssistantMessageItemRender_PrefixCacheNoCacheLeak guards against a
+// regression where the cache returned the prefixed output of the previous
+// width. We verify that the cached output for width=W contains the W-sized
+// prefix and not a stale wider one by checking that line lengths are
+// consistent on cache hit.
+func TestAssistantMessageItemRender_PrefixCacheNoCacheLeak(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	msg := finishedAssistantMessage("m4", strings.Repeat("word ", 40))
+	item := NewAssistantMessageItem(&sty, msg).(*AssistantMessageItem)
+	item.SetFocused(true)
+
+	out80 := item.Render(80)
+	out120 := item.Render(120)
+	require.NotEqual(t, out80, out120)
+
+	// Hit each cached entry again and confirm stability.
+	require.Equal(t, out80, item.Render(80))
+	require.Equal(t, out120, item.Render(120))
+}

internal/ui/chat/streaming_markdown.go 🔗

@@ -0,0 +1,740 @@
+package chat
+
+import (
+	"strings"
+
+	"charm.land/glamour/v2"
+	"github.com/charmbracelet/crush/internal/ui/common"
+)
+
+// streamingMarkdown caches a "stable prefix" glamour render so each
+// streaming flush only re-renders the trailing portion of the
+// document. F8 of docs/notes/2026-05-12-chat-rendering-perf.md.
+//
+// The boundary between "stable" and "trailing" is detected by
+// [findSafeMarkdownBoundary]: a position immediately after a blank
+// line at which we can prove no markdown construct is open
+// (fenced code block, list, table, block quote, setext header).
+//
+// Two renders concatenated are NOT generally equal to a single
+// render of the whole document — glamour's wrap state is reset
+// between calls. The boundary check is therefore deliberately
+// conservative; whenever it has the slightest doubt the call
+// falls back to a full render and the cache is left untouched.
+//
+// Invariants:
+//
+//   - stablePrefix is always a literal byte prefix of the most
+//     recently rendered content. If a new content does not have
+//     stablePrefix as its prefix the cache is dropped.
+//   - stablePrefixRender is the glamour render of stablePrefix
+//     alone, with surrounding whitespace trimmed for clean
+//     concatenation.
+//   - width is the glamour wrap width that produced
+//     stablePrefixRender. A width change drops the cache.
+type streamingMarkdown struct {
+	width              int
+	stablePrefix       string
+	stablePrefixRender string
+}
+
+// Reset drops every cached field. After Reset the next Render call
+// is guaranteed to be a full render.
+func (s *streamingMarkdown) Reset() {
+	s.width = 0
+	s.stablePrefix = ""
+	s.stablePrefixRender = ""
+}
+
+// Render returns the glamour render of content at the given width,
+// reusing the cached stable-prefix render when it is safe to do so.
+// On any uncertainty the call falls back to a full render via
+// renderer and leaves the cache untouched (or drops it).
+//
+// The returned string has its trailing newline trimmed to match
+// the existing renderMarkdown contract on AssistantMessageItem.
+//
+// Concurrency: glamour's Render is stateful and not safe for
+// concurrent invocation on a shared renderer. Crush's TUI is
+// single-threaded so production never contends, but parallel
+// callers (most notably the test suite) must serialize. We hold
+// [common.LockMarkdownRenderer] for the entire prefix +
+// trailing render sequence so other goroutines cannot interleave
+// their own Render calls and corrupt goldmark's BlockStack.
+func (s *streamingMarkdown) Render(content string, width int, renderer *glamour.TermRenderer) string {
+	mu := common.LockMarkdownRenderer(renderer)
+	mu.Lock()
+	defer mu.Unlock()
+	full := func() string {
+		out, err := renderer.Render(content)
+		if err != nil {
+			return content
+		}
+		return strings.TrimSuffix(out, "\n")
+	}
+
+	// Width change OR content not a prefix-extension: drop cache,
+	// full render, optionally try to seed a fresh boundary on this
+	// call (step "f" in the design note).
+	if width != s.width || !strings.HasPrefix(content, s.stablePrefix) {
+		s.Reset()
+		s.width = width
+		out := full()
+		s.tryAdvanceFromEmpty(content, width, renderer)
+		return out
+	}
+
+	boundary := findSafeMarkdownBoundary(content)
+	if boundary < 0 {
+		// No safe boundary anywhere yet. Full render; do not
+		// modify the cache (a future flush may find one).
+		return full()
+	}
+
+	if boundary <= len(s.stablePrefix) {
+		// Cached prefix already covers an at-least-as-late
+		// boundary. Render the trailing partial fresh and glue.
+		trail := content[len(s.stablePrefix):]
+		return glueRenders(s.stablePrefixRender, s.renderTrailing(trail, renderer))
+	}
+
+	// boundary > len(stablePrefix): we have a NEW chunk of safe
+	// content. Render the new chunk, append to stablePrefixRender,
+	// promote the boundary, then render the remaining trail.
+	newChunk := content[len(s.stablePrefix):boundary]
+	newChunkRender := s.renderTrailing(newChunk, renderer)
+	s.stablePrefixRender = glueRenders(s.stablePrefixRender, newChunkRender)
+	s.stablePrefix = content[:boundary]
+
+	trail := content[boundary:]
+	if trail == "" {
+		// boundary == len(content): no trailing content. Returning
+		// the cached prefix render directly is correct.
+		return s.stablePrefixRender
+	}
+	return glueRenders(s.stablePrefixRender, s.renderTrailing(trail, renderer))
+}
+
+// tryAdvanceFromEmpty seeds the cache from a fresh state. We've
+// already paid the cost of a full render of `content`; if there is
+// a safe boundary inside it, render the prefix once more (cheap
+// relative to the full render we just did) and cache it so the
+// next flush can avoid the full work.
+//
+// This is the optional optimisation step "f" from the design
+// note. We render the prefix separately rather than try to
+// recover it from the full render output because two renders
+// concatenated ≠ a single render of the whole, and we prefer the
+// cached prefix render to be byte-for-byte what we'd produce on a
+// future cached call.
+func (s *streamingMarkdown) tryAdvanceFromEmpty(content string, width int, renderer *glamour.TermRenderer) {
+	boundary := findSafeMarkdownBoundary(content)
+	if boundary <= 0 {
+		return
+	}
+	prefix := content[:boundary]
+	out, err := renderer.Render(prefix)
+	if err != nil {
+		return
+	}
+	s.stablePrefix = prefix
+	s.stablePrefixRender = trimGlamourMargins(out)
+	s.width = width
+}
+
+// renderTrailing renders a trailing partial as a fresh glamour
+// document and trims the surrounding whitespace so it can be
+// concatenated to a cached prefix render without doubled blank
+// lines.
+func (s *streamingMarkdown) renderTrailing(text string, renderer *glamour.TermRenderer) string {
+	if text == "" {
+		return ""
+	}
+	out, err := renderer.Render(text)
+	if err != nil {
+		return text
+	}
+	return trimGlamourMargins(out)
+}
+
+// glueRenders concatenates two glamour-rendered fragments with a
+// single blank line separator. Glamour outputs typically carry
+// their own surrounding margins; trimming on both sides and
+// gluing with "\n\n" prevents the visible double-margin seam.
+//
+// Empty fragments are tolerated so the same helper works for the
+// "boundary == len(content)" path where there is no trailing
+// segment.
+func glueRenders(prefix, trail string) string {
+	prefix = trimGlamourMargins(prefix)
+	trail = trimGlamourMargins(trail)
+	switch {
+	case prefix == "" && trail == "":
+		return ""
+	case prefix == "":
+		return trail
+	case trail == "":
+		return prefix
+	default:
+		return prefix + "\n\n" + trail
+	}
+}
+
+// trimGlamourMargins strips leading and trailing whitespace
+// (including newlines) from a glamour-rendered fragment.
+// Glamour adds a leading blank line for documents that open with
+// a heading or paragraph, plus a trailing newline; both must be
+// removed before concatenation.
+func trimGlamourMargins(s string) string {
+	return strings.Trim(s, " \t\n")
+}
+
+// findSafeMarkdownBoundary returns the byte offset of the END of
+// the latest safe boundary in content, i.e. the offset such that
+// content[:boundary] is a valid stable-prefix candidate. The
+// returned offset always points immediately after a blank-line
+// separator, so concatenating a fresh render of content[boundary:]
+// to a cached render of content[:boundary] does not require glamour
+// to share state across the cut.
+//
+// Returns -1 when no safe boundary exists. SAFETY FIRST: any time
+// we have the slightest doubt we return -1 and let the caller fall
+// back to a full render.
+//
+// Decision tree, in order of preference (latest boundary wins):
+//
+//  1. Walk backward through every "blank line" position p such that
+//     content[:p] ends with "\n\n" (or "\n[ \t]*\n").
+//  2. For each candidate, check that content[:p] has an even
+//     number of triple-backtick fence lines (no open fenced
+//     block). Any odd count means we'd be cutting inside a fence
+//     and mis-syntax-highlighting the trailing partial.
+//     2b. Reject if any line in content[:p] (outside fenced blocks)
+//     is a list-marker line, an HTML-block opener, or a link
+//     reference definition. See [prefixHasOpenHazard] for the
+//     reasoning behind these "anywhere in prefix" rejects.
+//  3. Reject if the last non-blank line of content[:p] is:
+//     - a list item marker line ("^\s*([-*+]|\d+\.)\s")
+//     - a table line (contains "|")
+//     - a block quote ("^\s*>")
+//     - a setext header underline ("^=+\s*$" or "^-+\s*$")
+//     - an indented code line (4+ leading spaces or a tab)
+//  4. Reject if the line immediately AFTER the boundary (skipping
+//     leading blank lines) looks like a setext underline (a line
+//     of '=' or '-' only). Rendering the prefix as a paragraph
+//     would change once the underline arrived; that's exactly the
+//     "splitting changes the prefix render" hazard §4.4 calls out.
+//
+// Returns the byte offset of the first character AFTER the blank
+// line, i.e. the start of the trailing segment.
+func findSafeMarkdownBoundary(content string) int {
+	if len(content) == 0 {
+		return -1
+	}
+
+	// Iterate every blank-line position from latest to earliest.
+	for p := blankLineBefore(content, len(content)); p > 0; p = blankLineBefore(content, p-1) {
+		if !isSafeBoundaryAt(content, p) {
+			continue
+		}
+		return p
+	}
+	return -1
+}
+
+// blankLineBefore returns the byte offset of the first character
+// AFTER the latest blank-line separator that ends strictly before
+// `until`. A blank-line separator is a sequence "\n([ \t]*\n)+"
+// — one newline, then one or more lines containing only spaces or
+// tabs and terminated by another newline. The returned offset is
+// the start of the first non-blank line that follows the
+// separator (or the position immediately after the final newline,
+// if no further content remains).
+//
+// Returns -1 when no blank-line separator exists before `until`.
+func blankLineBefore(content string, until int) int {
+	if until <= 0 {
+		return -1
+	}
+	// Walk backward looking for a newline followed (after optional
+	// blank-line content) by another newline. We track the latest
+	// newline we've seen; if the next earlier newline has only
+	// blank chars between them, we have a blank-line separator
+	// and the boundary sits immediately after the latest newline.
+	end := until
+	for end > 0 {
+		nl := strings.LastIndexByte(content[:end], '\n')
+		if nl < 0 {
+			return -1
+		}
+		// Look for an earlier newline whose gap to nl is empty
+		// or whitespace only.
+		prev := strings.LastIndexByte(content[:nl], '\n')
+		for prev >= 0 {
+			gap := content[prev+1 : nl]
+			if isBlankOrSpaces(gap) {
+				return nl + 1
+			}
+			// Gap had non-whitespace; nl is not a blank-line
+			// separator. Move up: try with the earlier newline as
+			// the new "nl" candidate.
+			break
+		}
+		end = nl
+	}
+	return -1
+}
+
+// isBlankOrSpaces reports whether s consists entirely of spaces
+// and tabs (or is empty).
+func isBlankOrSpaces(s string) bool {
+	for i := range len(s) {
+		if s[i] != ' ' && s[i] != '\t' {
+			return false
+		}
+	}
+	return true
+}
+
+// isSafeBoundaryAt reports whether content[:p] is a safe stable
+// prefix. p must be a blank-line boundary (start of a line, with a
+// blank line immediately preceding).
+//
+// Beyond the last-line checks, three "anywhere in the prefix"
+// hazards force a reject because they cannot be reliably reasoned
+// about by inspecting the trailing line alone. For each of these
+// the simplest, safest rule was chosen — see prefixHasOpenHazard.
+func isSafeBoundaryAt(content string, p int) bool {
+	prefix := content[:p]
+
+	// (2) Even number of triple-backtick fence lines.
+	if countFenceLines(prefix)%2 != 0 {
+		return false
+	}
+
+	// (2b) Anywhere-in-prefix hazards: open list (B1), HTML block
+	// opener (B2), reference link definition (B3). Any of these
+	// anywhere in the prefix forces a fallback.
+	if prefixHasOpenHazard(prefix) {
+		return false
+	}
+
+	// (3) Inspect the last non-blank line of the prefix.
+	lastLine := lastNonBlankLine(prefix)
+	if lastLine != "" && lineOpensConstruct(lastLine) {
+		return false
+	}
+
+	// (4) If anything follows, make sure it doesn't look like a
+	// setext underline that would retroactively turn the last
+	// paragraph of the prefix into a header.
+	if rest := content[p:]; rest != "" {
+		first := firstNonBlankLine(rest)
+		if isSetextUnderlineCandidate(first) {
+			return false
+		}
+	}
+
+	return true
+}
+
+// prefixHasOpenHazard reports whether prefix contains any of three
+// constructs that cannot be safely cut at a blank-line boundary
+// even when the immediately preceding line looks fine. Each check
+// uses the SIMPLEST viable conservative rule per the F8 round-2
+// review:
+//
+//	B1 (loose lists). A loose list has a blank line between an item
+//	   and a continuation paragraph that begins with indentation
+//	   but no list marker. If a candidate boundary lands on that
+//	   blank line, the prefix's trailing non-blank line is the
+//	   continuation paragraph, NOT a list marker, so the last-line
+//	   check would accept it even though the list is still open.
+//
+//	   Rule chosen: any list-marker line ANYWHERE in the prefix
+//	   forces -1. This is overly conservative — it forfeits
+//	   boundary advancement past a closed list — but it eliminates
+//	   the entire bug class with zero parsing of CommonMark's
+//	   loose-list closure semantics. We retain the most useful
+//	   boundary in practice: the one BEFORE the list opens (no
+//	   marker has appeared in the prefix yet).
+//
+//	B2 (HTML blocks). CommonMark defines seven HTML-block opener
+//	   patterns (script/pre/style/textarea, comments, processing
+//	   instructions, CDATA, declarations, recognised tag names).
+//	   If the prefix opens an HTML block that the suffix closes,
+//	   splitting renders the prefix as raw HTML and the suffix as
+//	   prose.
+//
+//	   Rule chosen: any HTML-block opener anywhere in the prefix
+//	   forces -1. Same trade-off as B1 — the typical assistant
+//	   output contains no raw HTML, so the perf cost is zero in
+//	   the common case.
+//
+//	B3 (reference link definitions). A line of the form
+//	   "[label]: <url>" defines a link reference that the suffix
+//	   may later use as "[text][label]". Splitting the document
+//	   loses the definition because each half is rendered as an
+//	   independent glamour document.
+//
+//	   Rule chosen: any reference link definition line anywhere in
+//	   the prefix forces -1. Suffix-side reference detection is
+//	   fragile (three syntaxes: [text][label], [label][], [label]),
+//	   so the prefix-side check is the simpler safe choice.
+//
+// All three rules accept the perf hit of "no boundary after a
+// list / HTML block / link def" in exchange for guaranteed
+// soundness. If profiling shows this kills the F8 win on real
+// streaming traces, the next iteration can promote each rule to
+// its less-conservative variant (closure-aware list tracking,
+// per-tag HTML close detection, suffix-aware ref tracking).
+func prefixHasOpenHazard(prefix string) bool {
+	inFence := false
+	for line := range splitLines(prefix) {
+		// Track fenced state so list/html/ref patterns inside a
+		// fenced code block do not falsely trigger the hazards.
+		if isFenceLine(line) {
+			inFence = !inFence
+			continue
+		}
+		if inFence {
+			continue
+		}
+		trimmed := strings.TrimLeft(line, " \t")
+		if trimmed == "" {
+			continue
+		}
+		// B1: any list-item marker.
+		if isListItemMarker(trimmed) {
+			return true
+		}
+		// B2: HTML block opener.
+		if isHTMLBlockOpener(line) {
+			return true
+		}
+		// B3: link reference definition.
+		if isLinkRefDefinition(line) {
+			return true
+		}
+	}
+	return false
+}
+
+// countFenceLines counts lines that begin a fenced code block in
+// the CommonMark sense: a line whose first non-whitespace run is
+// at least three consecutive backticks (or tildes). Each such
+// line toggles the fenced state, so an even count means every
+// opened fence has been closed.
+//
+// We accept up to three leading spaces of indentation (CommonMark
+// rule) and require the fence characters to be the FIRST
+// non-whitespace content of the line. We deliberately do NOT
+// attempt to parse info-strings or differentiate opener from
+// closer beyond toggling — a closing fence is just any line
+// whose first non-whitespace run is >=3 of the same fence char.
+func countFenceLines(s string) int {
+	n := 0
+	for line := range splitLines(s) {
+		if isFenceLine(line) {
+			n++
+		}
+	}
+	return n
+}
+
+// isFenceLine reports whether line opens or closes a fenced code
+// block.
+func isFenceLine(line string) bool {
+	// Strip up to 3 spaces of indentation.
+	i := 0
+	for i < len(line) && i < 3 && line[i] == ' ' {
+		i++
+	}
+	if i >= len(line) {
+		return false
+	}
+	c := line[i]
+	if c != '`' && c != '~' {
+		return false
+	}
+	run := 0
+	for i < len(line) && line[i] == c {
+		i++
+		run++
+	}
+	return run >= 3
+}
+
+// lastNonBlankLine returns the last non-blank line of s, or ""
+// when every line is blank.
+func lastNonBlankLine(s string) string {
+	last := ""
+	for line := range splitLines(s) {
+		if strings.TrimSpace(line) != "" {
+			last = line
+		}
+	}
+	return last
+}
+
+// firstNonBlankLine returns the first non-blank line of s, or ""
+// when every line is blank.
+func firstNonBlankLine(s string) string {
+	for line := range splitLines(s) {
+		if strings.TrimSpace(line) != "" {
+			return line
+		}
+	}
+	return ""
+}
+
+// splitLines yields the lines of s without their terminators. The
+// final segment is yielded even if not newline-terminated.
+func splitLines(s string) func(yield func(string) bool) {
+	return func(yield func(string) bool) {
+		start := 0
+		for i := 0; i < len(s); i++ {
+			if s[i] == '\n' {
+				if !yield(s[start:i]) {
+					return
+				}
+				start = i + 1
+			}
+		}
+		if start <= len(s)-1 {
+			yield(s[start:])
+		}
+	}
+}
+
+// lineOpensConstruct reports whether line keeps a markdown
+// construct open across the boundary. We err conservatively —
+// any case that smells like list/table/quote/setext/indented-code
+// returns true.
+func lineOpensConstruct(line string) bool {
+	// Indented code: a tab, or 4+ leading spaces.
+	if len(line) > 0 && line[0] == '\t' {
+		return true
+	}
+	if strings.HasPrefix(line, "    ") {
+		return true
+	}
+
+	trimmed := strings.TrimLeft(line, " \t")
+	if trimmed == "" {
+		return false
+	}
+
+	// Block quote.
+	if trimmed[0] == '>' {
+		return true
+	}
+
+	// List item: "- " "* " "+ " or "<digits>. " or "<digits>) ".
+	if isListItemMarker(trimmed) {
+		return true
+	}
+
+	// Table: any pipe character anywhere in the line. Conservative:
+	// pipe-in-prose is rare and the cost of bailing is one slow
+	// frame.
+	if strings.ContainsRune(line, '|') {
+		return true
+	}
+
+	// Setext underline candidate as the LAST line of the prefix:
+	// this would be a setext header for an even-earlier paragraph.
+	// Refuse to split at all in this case — the boundary is right
+	// in the middle of a header.
+	if isSetextUnderlineCandidate(trimmed) {
+		return true
+	}
+
+	return false
+}
+
+// isListItemMarker reports whether line (already left-trimmed)
+// starts with a CommonMark list-item marker followed by a space
+// or tab.
+func isListItemMarker(line string) bool {
+	if line == "" {
+		return false
+	}
+	c := line[0]
+	if c == '-' || c == '*' || c == '+' {
+		if len(line) >= 2 && (line[1] == ' ' || line[1] == '\t') {
+			return true
+		}
+		return false
+	}
+	// Ordered list: digits followed by '.' or ')' and a space.
+	i := 0
+	for i < len(line) && line[i] >= '0' && line[i] <= '9' {
+		i++
+	}
+	if i == 0 || i > 9 {
+		return false
+	}
+	if i >= len(line) {
+		return false
+	}
+	if line[i] != '.' && line[i] != ')' {
+		return false
+	}
+	if i+1 >= len(line) {
+		return false
+	}
+	return line[i+1] == ' ' || line[i+1] == '\t'
+}
+
+// isSetextUnderlineCandidate reports whether line (with optional
+// leading whitespace) consists entirely of '=' or entirely of '-'
+// characters with optional trailing whitespace. CommonMark
+// requires no leading whitespace on the underline; we accept up
+// to three spaces for safety so an indented underline still
+// blocks a split.
+func isSetextUnderlineCandidate(line string) bool {
+	// Strip leading whitespace.
+	i := 0
+	for i < len(line) && (line[i] == ' ' || line[i] == '\t') {
+		i++
+	}
+	if i == len(line) {
+		return false
+	}
+	c := line[i]
+	if c != '=' && c != '-' {
+		return false
+	}
+	j := i
+	for j < len(line) && line[j] == c {
+		j++
+	}
+	// Allow trailing whitespace.
+	for j < len(line) {
+		if line[j] != ' ' && line[j] != '\t' {
+			return false
+		}
+		j++
+	}
+	// Need at least one underline character. "-" alone is also a
+	// list marker without a trailing space; the listItem check
+	// covers the marker case before we get here.
+	return j-i >= 1
+}
+
+// isHTMLBlockOpener reports whether line begins one of the seven
+// CommonMark HTML block patterns. We accept up to three spaces of
+// leading indentation (CommonMark rule). Matching is intentionally
+// loose — we only need to know the line "looks like an HTML
+// block start", not parse the contained markup.
+func isHTMLBlockOpener(line string) bool {
+	// Strip up to 3 spaces of indentation.
+	i := 0
+	for i < len(line) && i < 3 && line[i] == ' ' {
+		i++
+	}
+	rest := line[i:]
+	if len(rest) < 2 || rest[0] != '<' {
+		return false
+	}
+
+	// Type 2: HTML comment "<!--".
+	if strings.HasPrefix(rest, "<!--") {
+		return true
+	}
+	// Type 3: processing instruction "<?".
+	if strings.HasPrefix(rest, "<?") {
+		return true
+	}
+	// Type 5: CDATA "<![CDATA[".
+	if strings.HasPrefix(rest, "<![CDATA[") {
+		return true
+	}
+	// Type 4: declaration "<!" followed by an ASCII letter.
+	if len(rest) >= 3 && rest[1] == '!' && isASCIILetter(rest[2]) {
+		return true
+	}
+
+	// Type 1: <script | <pre | <style | <textarea (case-insensitive)
+	// followed by whitespace, '>', end-of-line, or other non-name
+	// terminators. Use a permissive HasPrefix check on lowercase.
+	low := strings.ToLower(rest)
+	for _, t := range []string{"<script", "<pre", "<style", "<textarea"} {
+		if strings.HasPrefix(low, t) {
+			next := byte(0)
+			if len(low) > len(t) {
+				next = low[len(t)]
+			}
+			if next == 0 || next == ' ' || next == '\t' || next == '>' {
+				return true
+			}
+		}
+	}
+
+	// Types 6 & 7: open or close of a block-level tag.
+	//
+	// Type 6 matches a fixed CommonMark tag set; type 7 matches any
+	// otherwise-valid open/close tag whose name is not in the
+	// script/pre/style/textarea family. We collapse both into a
+	// single check: the line must start with '<' or '</' followed
+	// by an ASCII letter. This deliberately mirrors the other
+	// hazards — when in doubt, forfeit the boundary. Lines like
+	// "<3", "<-", "<<", or mid-line "<foo>" do NOT trigger because
+	// we require the line to *start* (after up to 3 spaces) with
+	// '<letter' or '</letter'.
+	j := 1 // past '<'
+	if j < len(rest) && rest[j] == '/' {
+		j++
+	}
+	if j >= len(rest) || !isASCIILetter(rest[j]) {
+		return false
+	}
+	return true
+}
+
+// isASCIILetter reports whether b is an ASCII letter.
+func isASCIILetter(b byte) bool {
+	return (b >= 'a' && b <= 'z') || (b >= 'A' && b <= 'Z')
+}
+
+// isLinkRefDefinition reports whether line matches a CommonMark
+// link reference definition opener. The conservative pattern:
+//
+//	^[ ]{0,3}\[[^\]]+\]:\s*\S+
+//
+// i.e. up to 3 spaces, then a bracketed label (no nested ']'),
+// then a colon, then whitespace, then at least one non-whitespace
+// character of destination. We do not validate the destination —
+// presence of a ref-def opener anywhere in the prefix is enough
+// to forfeit the boundary.
+func isLinkRefDefinition(line string) bool {
+	i := 0
+	for i < len(line) && i < 3 && line[i] == ' ' {
+		i++
+	}
+	if i >= len(line) || line[i] != '[' {
+		return false
+	}
+	i++
+	labelStart := i
+	for i < len(line) && line[i] != ']' {
+		i++
+	}
+	if i >= len(line) || i == labelStart {
+		// No closing bracket, or empty label.
+		return false
+	}
+	// i points at ']'.
+	i++
+	if i >= len(line) || line[i] != ':' {
+		return false
+	}
+	i++
+	// Skip required whitespace.
+	for i < len(line) && (line[i] == ' ' || line[i] == '\t') {
+		i++
+	}
+	// At least one non-whitespace character of destination.
+	return i < len(line)
+}

internal/ui/chat/tools.go 🔗

@@ -19,6 +19,7 @@ import (
 	"github.com/charmbracelet/crush/internal/stringext"
 	"github.com/charmbracelet/crush/internal/ui/anim"
 	"github.com/charmbracelet/crush/internal/ui/common"
+	"github.com/charmbracelet/crush/internal/ui/list"
 	"github.com/charmbracelet/crush/internal/ui/styles"
 	"github.com/charmbracelet/x/ansi"
 )
@@ -135,6 +136,7 @@ func (f ToolRendererFunc) RenderTool(sty *styles.Styles, width int, opts *ToolRe
 
 // baseToolMessageItem represents a tool call message that can be displayed in the UI.
 type baseToolMessageItem struct {
+	*list.Versioned
 	*highlightableMessageItem
 	*cachedMessageItem
 	*focusableMessageItem
@@ -176,10 +178,12 @@ func newBaseToolMessageItem(
 		status = ToolStatusCanceled
 	}
 
+	v := list.NewVersioned()
 	t := &baseToolMessageItem{
-		highlightableMessageItem: defaultHighlighter(sty),
+		Versioned:                v,
+		highlightableMessageItem: defaultHighlighter(sty, v),
 		cachedMessageItem:        &cachedMessageItem{},
-		focusableMessageItem:     &focusableMessageItem{},
+		focusableMessageItem:     newFocusableMessageItem(v),
 		sty:                      sty,
 		toolRenderer:             toolRenderer,
 		toolCall:                 toolCall,
@@ -270,8 +274,12 @@ func NewToolMessageItem(
 
 // SetCompact implements the Compactable interface.
 func (t *baseToolMessageItem) SetCompact(compact bool) {
+	if t.isCompact == compact {
+		return
+	}
 	t.isCompact = compact
 	t.clearCache()
+	t.Bump()
 }
 
 // ID returns the unique identifier for this tool message item.
@@ -288,10 +296,22 @@ func (t *baseToolMessageItem) StartAnimation() tea.Cmd {
 }
 
 // Animate progresses the assistant message animation if it should be spinning.
+//
+// Bumps the F6 list-cache version so the next draw re-renders this
+// item: a spinner tick mutates anim's internal frame counter, which
+// changes the rendered output but is invisible to the per-item
+// caches. Without the bump the list cache would serve the previously
+// rendered frame indefinitely and the spinner would appear frozen.
+// The ID gate keeps unrelated ticks (routed here by a future change
+// to chat.Animate's dispatch) from churning the cache.
 func (t *baseToolMessageItem) Animate(msg anim.StepMsg) tea.Cmd {
 	if !t.isSpinning() {
 		return nil
 	}
+	if msg.ID != t.toolCall.ID {
+		return nil
+	}
+	t.Bump()
 	return t.anim.Animate(msg)
 }
 
@@ -332,6 +352,24 @@ func (t *baseToolMessageItem) RawRender(width int) string {
 
 // Render renders the tool message item at the given width.
 func (t *baseToolMessageItem) Render(width int) string {
+	// Cache the prefixed output keyed by (width, prefix variant).
+	// Bypass the cache while spinning (RawRender output is
+	// frame-dependent) or while a highlight range is active.
+	useCache := !t.isSpinning() && !t.isHighlighted()
+	var key uint64
+	switch {
+	case t.isCompact:
+		key = 2
+	case t.focused:
+		key = 1
+	default:
+		key = 0
+	}
+	if useCache {
+		if cached, ok := t.getCachedPrefixedRender(width, key); ok {
+			return cached
+		}
+	}
 	var prefix string
 	if t.isCompact {
 		prefix = t.sty.Messages.ToolCallCompact.Render()
@@ -344,7 +382,11 @@ func (t *baseToolMessageItem) Render(width int) string {
 	for i, ln := range lines {
 		lines[i] = prefix + ln
 	}
-	return strings.Join(lines, "\n")
+	out := strings.Join(lines, "\n")
+	if useCache {
+		t.setCachedPrefixedRender(out, width, key)
+	}
+	return out
 }
 
 // ToolCall returns the tool call associated with this message item.
@@ -356,12 +398,14 @@ func (t *baseToolMessageItem) ToolCall() message.ToolCall {
 func (t *baseToolMessageItem) SetToolCall(tc message.ToolCall) {
 	t.toolCall = tc
 	t.clearCache()
+	t.Bump()
 }
 
 // SetResult sets the tool result associated with this message item.
 func (t *baseToolMessageItem) SetResult(res *message.ToolResult) {
 	t.result = res
 	t.clearCache()
+	t.Bump()
 }
 
 // MessageID returns the ID of the message containing this tool call.
@@ -370,14 +414,20 @@ func (t *baseToolMessageItem) MessageID() string {
 }
 
 // SetMessageID sets the ID of the message containing this tool call.
+// MessageID is metadata only and does not affect the rendered output,
+// so we deliberately do not bump the version here.
 func (t *baseToolMessageItem) SetMessageID(id string) {
 	t.messageID = id
 }
 
 // SetStatus sets the tool status.
 func (t *baseToolMessageItem) SetStatus(status ToolStatus) {
+	if t.status == status {
+		return
+	}
 	t.status = status
 	t.clearCache()
+	t.Bump()
 }
 
 // Status returns the current tool status.
@@ -417,9 +467,25 @@ func (t *baseToolMessageItem) SetSpinningFunc(fn SpinningFunc) {
 func (t *baseToolMessageItem) ToggleExpanded() bool {
 	t.expandedContent = !t.expandedContent
 	t.clearCache()
+	t.Bump()
 	return t.expandedContent
 }
 
+// Finished implements list.Item. A tool call is freezable once the
+// tool call itself is marked finished AND a result has been recorded
+// (or it has been canceled). Tools that override the spinning logic
+// via spinningFunc would short-circuit live ticks; we still gate
+// freezing on isSpinning to keep the contract conservative.
+func (t *baseToolMessageItem) Finished() bool {
+	if t.isSpinning() {
+		return false
+	}
+	if t.status == ToolStatusCanceled {
+		return true
+	}
+	return t.toolCall.Finished && t.result != nil
+}
+
 // HandleMouseClick implements MouseClickable.
 func (t *baseToolMessageItem) HandleMouseClick(btn ansi.MouseButton, x, y int) bool {
 	return btn == ansi.MouseLeft
@@ -974,7 +1040,10 @@ func toolOutputMarkdownContent(sty *styles.Styles, content string, width int, ex
 	}
 
 	renderer := common.QuietMarkdownRenderer(sty, width)
+	mu := common.LockMarkdownRenderer(renderer)
+	mu.Lock()
 	rendered, err := renderer.Render(content)
+	mu.Unlock()
 	if err != nil {
 		return toolOutputPlainContent(sty, content, width, expanded)
 	}

internal/ui/chat/user.go 🔗

@@ -8,11 +8,13 @@ import (
 	"github.com/charmbracelet/crush/internal/message"
 	"github.com/charmbracelet/crush/internal/ui/attachments"
 	"github.com/charmbracelet/crush/internal/ui/common"
+	"github.com/charmbracelet/crush/internal/ui/list"
 	"github.com/charmbracelet/crush/internal/ui/styles"
 )
 
 // UserMessageItem represents a user message in the chat UI.
 type UserMessageItem struct {
+	*list.Versioned
 	*highlightableMessageItem
 	*cachedMessageItem
 	*focusableMessageItem
@@ -24,16 +26,24 @@ type UserMessageItem struct {
 
 // NewUserMessageItem creates a new UserMessageItem.
 func NewUserMessageItem(sty *styles.Styles, message *message.Message, attachments *attachments.Renderer) MessageItem {
+	v := list.NewVersioned()
 	return &UserMessageItem{
-		highlightableMessageItem: defaultHighlighter(sty),
+		Versioned:                v,
+		highlightableMessageItem: defaultHighlighter(sty, v),
 		cachedMessageItem:        &cachedMessageItem{},
-		focusableMessageItem:     &focusableMessageItem{},
+		focusableMessageItem:     newFocusableMessageItem(v),
 		attachments:              attachments,
 		message:                  message,
 		sty:                      sty,
 	}
 }
 
+// Finished implements list.Item. User messages are immutable once
+// submitted, so the entry is always safe to freeze.
+func (m *UserMessageItem) Finished() bool {
+	return true
+}
+
 // RawRender implements [MessageItem].
 func (m *UserMessageItem) RawRender(width int) string {
 	cappedWidth := cappedMessageWidth(width)
@@ -47,7 +57,10 @@ func (m *UserMessageItem) RawRender(width int) string {
 	renderer := common.MarkdownRenderer(m.sty, cappedWidth)
 
 	msgContent := strings.TrimSpace(m.message.Content().Text)
+	mu := common.LockMarkdownRenderer(renderer)
+	mu.Lock()
 	result, err := renderer.Render(msgContent)
+	mu.Unlock()
 	if err != nil {
 		content = msgContent
 	} else {
@@ -70,6 +83,20 @@ func (m *UserMessageItem) RawRender(width int) string {
 
 // Render implements MessageItem.
 func (m *UserMessageItem) Render(width int) string {
+	// Bypass the prefix cache while a highlight range is active so
+	// selection drags reflect immediately without invalidating the
+	// cache. Highlight changes are intentionally applied "above" the
+	// prefix cache.
+	useCache := !m.isHighlighted()
+	var key uint64
+	if m.focused {
+		key = 1
+	}
+	if useCache {
+		if cached, ok := m.getCachedPrefixedRender(width, key); ok {
+			return cached
+		}
+	}
 	var prefix string
 	if m.focused {
 		prefix = m.sty.Messages.UserFocused.Render()
@@ -80,7 +107,11 @@ func (m *UserMessageItem) Render(width int) string {
 	for i, line := range lines {
 		lines[i] = prefix + line
 	}
-	return strings.Join(lines, "\n")
+	out := strings.Join(lines, "\n")
+	if useCache {
+		m.setCachedPrefixedRender(out, width, key)
+	}
+	return out
 }
 
 // ID implements MessageItem.

internal/ui/chat/version_bump_test.go 🔗

@@ -0,0 +1,565 @@
+package chat
+
+import (
+	"testing"
+	"time"
+
+	"github.com/charmbracelet/crush/internal/config"
+	"github.com/charmbracelet/crush/internal/message"
+	"github.com/charmbracelet/crush/internal/ui/anim"
+	"github.com/charmbracelet/crush/internal/ui/attachments"
+	"github.com/charmbracelet/crush/internal/ui/list"
+	"github.com/charmbracelet/crush/internal/ui/styles"
+	"github.com/stretchr/testify/require"
+)
+
+// versionedItem is the cross-cutting interface every chat item type
+// must satisfy under F6: every documented mutator must bump the
+// shared version counter so the list-level memo invalidates.
+type versionedItem interface {
+	list.Item
+	Version() uint64
+}
+
+// requireBump asserts that the supplied mutator advances the item's
+// Version(). The mutator runs once; an absent bump is a regression
+// (a finished item would keep serving stale frozen output to the
+// list cache).
+func requireBump(t *testing.T, name string, item versionedItem, mutate func()) {
+	t.Helper()
+	before := item.Version()
+	mutate()
+	after := item.Version()
+	require.Greaterf(t, after, before, "%s must bump Version() (before=%d, after=%d)", name, before, after)
+}
+
+// TestAssistantMessageItem_MutatorsBumpVersion enumerates every
+// documented mutator on AssistantMessageItem and asserts each one
+// advances Version().
+func TestAssistantMessageItem_MutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	build := func(thinking, content string) *message.Message {
+		parts := []message.ContentPart{
+			message.ReasoningContent{
+				Thinking:   thinking,
+				StartedAt:  testStartedAt,
+				FinishedAt: testFinishedAt,
+			},
+		}
+		if content != "" {
+			parts = append(parts, message.TextContent{Text: content})
+		}
+		return &message.Message{ID: "a-mut", Role: message.Assistant, Parts: parts}
+	}
+
+	item := NewAssistantMessageItem(&sty, build("thinking", "content")).(*AssistantMessageItem)
+
+	requireBump(t, "SetMessage", item, func() {
+		item.SetMessage(build("thinking", "more content"))
+	})
+	requireBump(t, "SetFocused", item, func() {
+		item.SetFocused(true)
+	})
+	requireBump(t, "SetHighlight", item, func() {
+		item.SetHighlight(0, 0, 0, 5)
+	})
+	// ToggleExpanded only mutates state when there is non-empty
+	// thinking text — which the build helper provides.
+	requireBump(t, "ToggleExpanded", item, func() {
+		item.ToggleExpanded()
+	})
+}
+
+// TestUserMessageItem_MutatorsBumpVersion enumerates UserMessageItem
+// mutators.
+func TestUserMessageItem_MutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	r := attachments.NewRenderer(
+		sty.Attachments.Normal,
+		sty.Attachments.Deleting,
+		sty.Attachments.Image,
+		sty.Attachments.Text,
+	)
+	msg := &message.Message{
+		ID:   "u-mut",
+		Role: message.User,
+		Parts: []message.ContentPart{
+			message.TextContent{Text: "Hello"},
+		},
+	}
+	item := NewUserMessageItem(&sty, msg, r).(*UserMessageItem)
+
+	requireBump(t, "SetFocused", item, func() {
+		item.SetFocused(true)
+	})
+	requireBump(t, "SetHighlight", item, func() {
+		item.SetHighlight(0, 0, 0, 3)
+	})
+}
+
+// TestAssistantInfoItem_VersionedAndFinished sanity-checks the
+// AssistantInfoItem wiring. The item carries only immutable data
+// after construction; we still assert Version() is callable and
+// Finished() returns true.
+func TestAssistantInfoItem_VersionedAndFinished(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	cfg := &config.Config{}
+	msg := &message.Message{
+		ID:    "info",
+		Role:  message.Assistant,
+		Parts: []message.ContentPart{message.Finish{Reason: message.FinishReasonEndTurn, Time: time.Now().Unix()}},
+	}
+	item := NewAssistantInfoItem(&sty, msg, cfg, time.Unix(0, 0)).(*AssistantInfoItem)
+
+	require.True(t, item.Finished(), "AssistantInfoItem must be Finished()")
+	// Version() is callable and starts at zero.
+	require.Equal(t, uint64(0), item.Version())
+}
+
+// TestBaseToolMessageItem_MutatorsBumpVersion enumerates the base
+// tool item mutators. Specific tool types layer on top of this
+// base; the base bumps cover the shared mutator surface.
+func TestBaseToolMessageItem_MutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	tc := message.ToolCall{ID: "tc1", Name: "bash", Input: "{}", Finished: false}
+	item := NewToolMessageItem(&sty, "msg", tc, nil, false)
+
+	v := item.(versionedItem)
+
+	requireBump(t, "SetFocused", v, func() {
+		if f, ok := item.(list.Focusable); ok {
+			f.SetFocused(true)
+		}
+	})
+	requireBump(t, "SetHighlight", v, func() {
+		if h, ok := item.(list.Highlightable); ok {
+			h.SetHighlight(0, 0, 0, 3)
+		}
+	})
+	requireBump(t, "SetToolCall", v, func() {
+		tc2 := tc
+		tc2.Input = `{"command":"echo"}`
+		item.SetToolCall(tc2)
+	})
+	requireBump(t, "SetResult", v, func() {
+		item.SetResult(&message.ToolResult{ToolCallID: "tc1", Content: "ok"})
+	})
+	requireBump(t, "SetStatus", v, func() {
+		item.SetStatus(ToolStatusSuccess)
+	})
+	requireBump(t, "ToggleExpanded", v, func() {
+		if e, ok := item.(Expandable); ok {
+			e.ToggleExpanded()
+		}
+	})
+	requireBump(t, "SetCompact", v, func() {
+		if c, ok := item.(Compactable); ok {
+			c.SetCompact(true)
+		}
+	})
+}
+
+// TestAssistantMessageItem_AnimateBumpsVersion covers the spinner
+// regression: while the assistant message is spinning, every
+// anim.StepMsg fed through Animate must bump Version() so the
+// list-level cache invalidates and the next draw re-renders the
+// advanced spinner frame. Without this bump the cached entry's
+// version stays put and the spinner appears frozen.
+func TestAssistantMessageItem_AnimateBumpsVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	streaming := &message.Message{
+		ID:   "spin",
+		Role: message.Assistant,
+		Parts: []message.ContentPart{
+			message.ReasoningContent{Thinking: "thinking..."},
+		},
+	}
+	item := NewAssistantMessageItem(&sty, streaming).(*AssistantMessageItem)
+
+	requireBump(t, "Animate", item, func() {
+		item.Animate(anim.StepMsg{})
+	})
+
+	// A non-spinning item must not bump on Animate: the bump only
+	// makes sense while the spinner is live, and a stray bump on a
+	// finished item would needlessly invalidate frozen entries.
+	finished := &message.Message{
+		ID:   "spin",
+		Role: message.Assistant,
+		Parts: []message.ContentPart{
+			message.TextContent{Text: "done"},
+			message.Finish{Reason: message.FinishReasonEndTurn, Time: testFinishTime},
+		},
+	}
+	item.SetMessage(finished)
+	require.True(t, item.Finished(), "item must report Finished() once the message finishes")
+	before := item.Version()
+	item.Animate(anim.StepMsg{})
+	require.Equal(t, before, item.Version(), "Animate must not bump Version() on a non-spinning item")
+}
+
+// TestAssistantMessageItem_FinishedTransition covers §4.5.1: a
+// streaming assistant message reports Finished() == false; once the
+// message reports IsFinished() and stops spinning, Finished() must
+// return true.
+func TestAssistantMessageItem_FinishedTransition(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+
+	// Streaming: no finish part, no content yet — isSpinning == true.
+	streaming := &message.Message{
+		ID:   "stream",
+		Role: message.Assistant,
+		Parts: []message.ContentPart{
+			message.ReasoningContent{Thinking: "thinking..."},
+		},
+	}
+	item := NewAssistantMessageItem(&sty, streaming).(*AssistantMessageItem)
+	require.False(t, item.Finished(), "streaming assistant message must not be Finished()")
+
+	// Finished with content.
+	finished := &message.Message{
+		ID:   "stream",
+		Role: message.Assistant,
+		Parts: []message.ContentPart{
+			message.ReasoningContent{Thinking: "thinking", StartedAt: testStartedAt, FinishedAt: testFinishedAt},
+			message.TextContent{Text: "the answer"},
+			message.Finish{Reason: message.FinishReasonEndTurn, Time: testFinishTime},
+		},
+	}
+	item.SetMessage(finished)
+	require.True(t, item.Finished(), "finished assistant message must be Finished()")
+}
+
+// TestUserMessageItem_FinishedAlwaysTrue locks in the freezable
+// contract: user messages are never spinning.
+func TestUserMessageItem_FinishedAlwaysTrue(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	r := attachments.NewRenderer(
+		sty.Attachments.Normal,
+		sty.Attachments.Deleting,
+		sty.Attachments.Image,
+		sty.Attachments.Text,
+	)
+	msg := &message.Message{
+		ID:    "u-fin",
+		Role:  message.User,
+		Parts: []message.ContentPart{message.TextContent{Text: "hi"}},
+	}
+	item := NewUserMessageItem(&sty, msg, r).(*UserMessageItem)
+	require.True(t, item.Finished())
+}
+
+// TestAgentToolMessageItem_NestedToolMutatorsBumpVersion covers B1:
+// the nested-tool mutators on AgentToolMessageItem must bump
+// Version() so the list cache invalidates frozen entries when a
+// nested tool is added or the slice changes. SetNestedTools always
+// bumps unconditionally — the live update path in
+// internal/ui/model/ui.go mutates existing children in place and
+// then re-passes the same slice, so a pointer-equality dedupe would
+// hide observable child-render changes. AddNestedTool also always
+// observably mutates state and always bumps.
+func TestAgentToolMessageItem_NestedToolMutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	parent := message.ToolCall{ID: "agent-parent", Name: "agent", Input: `{}`, Finished: false}
+	item := NewAgentToolMessageItem(&sty, parent, nil, false)
+
+	mkChild := func(id string) ToolMessageItem {
+		tc := message.ToolCall{ID: id, Name: "bash", Input: `{}`, Finished: false}
+		return NewToolMessageItem(&sty, "msg", tc, nil, false)
+	}
+
+	// AddNestedTool always bumps.
+	requireBump(t, "AddNestedTool", item, func() {
+		item.AddNestedTool(mkChild("c1"))
+	})
+
+	// SetNestedTools always bumps, even with a pointer-equal slice.
+	current := append([]ToolMessageItem(nil), item.NestedTools()...)
+	requireBump(t, "SetNestedTools[pointer-equal]", item, func() {
+		item.SetNestedTools(current)
+	})
+
+	// SetNestedTools with a different slice (extra element) bumps.
+	requireBump(t, "SetNestedTools[different]", item, func() {
+		item.SetNestedTools(append(current, mkChild("c2")))
+	})
+
+	// SetNestedTools to an empty slice from a non-empty state bumps.
+	requireBump(t, "SetNestedTools[empty]", item, func() {
+		item.SetNestedTools(nil)
+	})
+}
+
+// TestAgenticFetchToolMessageItem_NestedToolMutatorsBumpVersion is
+// the agentic-fetch counterpart to the agent-tool nested mutator
+// bump test above.
+func TestAgenticFetchToolMessageItem_NestedToolMutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	parent := message.ToolCall{ID: "fetch-parent", Name: "agentic_fetch", Input: `{}`, Finished: false}
+	item := NewAgenticFetchToolMessageItem(&sty, parent, nil, false)
+
+	mkChild := func(id string) ToolMessageItem {
+		tc := message.ToolCall{ID: id, Name: "fetch", Input: `{}`, Finished: false}
+		return NewToolMessageItem(&sty, "msg", tc, nil, false)
+	}
+
+	requireBump(t, "AddNestedTool", item, func() {
+		item.AddNestedTool(mkChild("c1"))
+	})
+
+	current := append([]ToolMessageItem(nil), item.NestedTools()...)
+	requireBump(t, "SetNestedTools[pointer-equal]", item, func() {
+		item.SetNestedTools(current)
+	})
+
+	requireBump(t, "SetNestedTools[different]", item, func() {
+		item.SetNestedTools(append(current, mkChild("c2")))
+	})
+
+	requireBump(t, "SetNestedTools[empty]", item, func() {
+		item.SetNestedTools(nil)
+	})
+}
+
+// TestAgentToolMessageItem_NestedChildInPlaceMutationBumpsParent is
+// the T5 regression test: it mirrors the live update flow at
+// internal/ui/model/ui.go:1242-1281 where nested tool calls are
+// updated in place (SetToolCall / SetResult on the same child
+// pointers) and then the same slice is handed back to the parent
+// via SetNestedTools. The parent must still bump its version so
+// the list cache invalidates the parent's pre-rendered string and
+// the freshly-rendered child output becomes visible.
+func TestAgentToolMessageItem_NestedChildInPlaceMutationBumpsParent(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	parent := message.ToolCall{ID: "agent-parent", Name: "agent", Input: `{}`, Finished: false}
+	item := NewAgentToolMessageItem(&sty, parent, nil, false)
+
+	childTC := message.ToolCall{ID: "c1", Name: "bash", Input: `{}`, Finished: false}
+	child := NewToolMessageItem(&sty, "msg", childTC, nil, false)
+	item.AddNestedTool(child)
+
+	v0 := item.Version()
+	childVersionBefore := child.(versionedItem).Version()
+
+	// In-place mutate the existing child, exactly like the live
+	// flow in ui.go:1271-1278 does.
+	child.SetResult(&message.ToolResult{ToolCallID: "c1", Content: "ok"})
+	require.Greaterf(t, child.(versionedItem).Version(), childVersionBefore,
+		"child SetResult must bump child version")
+
+	// Hand the same slice back to the parent (pointers unchanged).
+	same := item.NestedTools()
+	item.SetNestedTools(same)
+	require.Greaterf(t, item.Version(), v0,
+		"parent SetNestedTools must bump even when child pointers are unchanged (in-place child mutation invalidates parent's pre-rendered output)")
+}
+
+// TestAgenticFetchToolMessageItem_NestedChildInPlaceMutationBumpsParent
+// is the agentic-fetch counterpart of the T5 regression test.
+func TestAgenticFetchToolMessageItem_NestedChildInPlaceMutationBumpsParent(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	parent := message.ToolCall{ID: "fetch-parent", Name: "agentic_fetch", Input: `{}`, Finished: false}
+	item := NewAgenticFetchToolMessageItem(&sty, parent, nil, false)
+
+	childTC := message.ToolCall{ID: "c1", Name: "fetch", Input: `{}`, Finished: false}
+	child := NewToolMessageItem(&sty, "msg", childTC, nil, false)
+	item.AddNestedTool(child)
+
+	v0 := item.Version()
+	childVersionBefore := child.(versionedItem).Version()
+
+	child.SetResult(&message.ToolResult{ToolCallID: "c1", Content: "ok"})
+	require.Greaterf(t, child.(versionedItem).Version(), childVersionBefore,
+		"child SetResult must bump child version")
+
+	same := item.NestedTools()
+	item.SetNestedTools(same)
+	require.Greaterf(t, item.Version(), v0,
+		"parent SetNestedTools must bump even when child pointers are unchanged")
+}
+
+// requireNoBump asserts the supplied mutator leaves the item's
+// Version() unchanged. The mutator runs once; an unexpected bump
+// would force the F6 list memo to re-render an item whose output
+// did not change, churning the cache.
+func requireNoBump(t *testing.T, name string, item versionedItem, mutate func()) {
+	t.Helper()
+	before := item.Version()
+	mutate()
+	after := item.Version()
+	require.Equalf(t, before, after,
+		"%s must not bump Version() (before=%d, after=%d)", name, before, after)
+}
+
+// TestBaseToolMessageItem_AnimateBumpsVersion is the spinner
+// regression test for non-agent tools: while the tool is spinning,
+// every anim.StepMsg whose ID matches the tool must bump Version()
+// so the list-level cache invalidates and the next draw re-renders
+// the advanced spinner frame. Foreign IDs must not bump (they would
+// churn the cache on every frame), and a finished tool must not
+// bump on any ID (the entry is frozen and stays frozen).
+func TestBaseToolMessageItem_AnimateBumpsVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	tc := message.ToolCall{ID: "tc-spin", Name: "bash", Input: "{}", Finished: false}
+	item := NewToolMessageItem(&sty, "msg", tc, nil, false)
+	v := item.(versionedItem)
+	a, ok := item.(Animatable)
+	require.True(t, ok, "base tool message item must implement Animatable")
+
+	// Spinning + matching ID → bump.
+	requireBump(t, "Animate[spinning,own ID]", v, func() {
+		a.Animate(anim.StepMsg{ID: tc.ID})
+	})
+
+	// Spinning + foreign ID → no bump. Routing this StepMsg here at
+	// all would mean a future chat.Animate refactor; the item must
+	// be defensive against it so we don't churn the list cache.
+	requireNoBump(t, "Animate[spinning,foreign ID]", v, func() {
+		a.Animate(anim.StepMsg{ID: "some-other-tool"})
+	})
+
+	// Finished → no bump on any ID. The entry is frozen; a stray
+	// bump would needlessly invalidate frozen entries.
+	tcFinished := tc
+	tcFinished.Finished = true
+	item.SetToolCall(tcFinished)
+	item.SetResult(&message.ToolResult{ToolCallID: tc.ID, Content: "ok"})
+	require.True(t, item.Finished(), "tool must report Finished() once the result lands")
+
+	requireNoBump(t, "Animate[finished,own ID]", v, func() {
+		a.Animate(anim.StepMsg{ID: tc.ID})
+	})
+	requireNoBump(t, "Animate[finished,foreign ID]", v, func() {
+		a.Animate(anim.StepMsg{ID: "some-other-tool"})
+	})
+}
+
+// TestAgentToolMessageItem_AnimateBumpsVersion is the spinner
+// regression test for agent tools. The parent must bump on both
+// the parent-tick branch (msg.ID == parent.ID()) and the
+// nested-tick branch (msg.ID == nested.ID()) because the list
+// only checks the parent's version — nested tools are not list
+// entries of their own. Unrelated IDs must not bump, and a parent
+// with a result must not bump on any ID.
+func TestAgentToolMessageItem_AnimateBumpsVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	parentTC := message.ToolCall{ID: "agent-parent", Name: "agent", Input: `{}`, Finished: false}
+	parent := NewAgentToolMessageItem(&sty, parentTC, nil, false)
+
+	childTC := message.ToolCall{ID: "agent-child", Name: "bash", Input: `{}`, Finished: false}
+	child := NewToolMessageItem(&sty, "msg", childTC, nil, false)
+	parent.AddNestedTool(child)
+
+	// Spinning + parent's own ID → parent bumps.
+	requireBump(t, "Animate[spinning,parent ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: parentTC.ID})
+	})
+
+	// Spinning + nested child ID → parent bumps. The list only
+	// invalidates on the parent; without this the nested
+	// spinner's frame would never reach the screen even though
+	// the nested anim's step has advanced.
+	requireBump(t, "Animate[spinning,nested ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: childTC.ID})
+	})
+
+	// Spinning + unrelated ID → no bump.
+	requireNoBump(t, "Animate[spinning,foreign ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: "unrelated"})
+	})
+
+	// Once the parent has a result, neither branch bumps.
+	parent.SetResult(&message.ToolResult{ToolCallID: parentTC.ID, Content: "done"})
+	requireNoBump(t, "Animate[finished,parent ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: parentTC.ID})
+	})
+	requireNoBump(t, "Animate[finished,nested ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: childTC.ID})
+	})
+}
+
+// TestAgenticFetchToolMessageItem_AnimateBumpsVersion is the
+// agentic-fetch counterpart of the agent-tool Animate bump test.
+// Without an explicit override the embedded base Animate would
+// drop nested-child StepMsgs at anim.Animate's ID check and never
+// bump the parent on its own ticks; this test locks in the
+// override.
+func TestAgenticFetchToolMessageItem_AnimateBumpsVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	parentTC := message.ToolCall{ID: "fetch-parent", Name: "agentic_fetch", Input: `{}`, Finished: false}
+	parent := NewAgenticFetchToolMessageItem(&sty, parentTC, nil, false)
+
+	childTC := message.ToolCall{ID: "fetch-child", Name: "fetch", Input: `{}`, Finished: false}
+	child := NewToolMessageItem(&sty, "msg", childTC, nil, false)
+	parent.AddNestedTool(child)
+
+	requireBump(t, "Animate[spinning,parent ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: parentTC.ID})
+	})
+	requireBump(t, "Animate[spinning,nested ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: childTC.ID})
+	})
+	requireNoBump(t, "Animate[spinning,foreign ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: "unrelated"})
+	})
+
+	parent.SetResult(&message.ToolResult{ToolCallID: parentTC.ID, Content: "done"})
+	requireNoBump(t, "Animate[finished,parent ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: parentTC.ID})
+	})
+	requireNoBump(t, "Animate[finished,nested ID]", parent, func() {
+		parent.Animate(anim.StepMsg{ID: childTC.ID})
+	})
+}
+
+// TestBaseToolMessageItem_FinishedTransition covers §4.5.1 for
+// tools: a still-running tool reports Finished() == false; once the
+// tool call is marked finished and a result lands, Finished()
+// returns true. Cancelled tools also become Finished.
+func TestBaseToolMessageItem_FinishedTransition(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	tc := message.ToolCall{ID: "tc-fin", Name: "bash", Input: "{}", Finished: false}
+	item := NewToolMessageItem(&sty, "msg", tc, nil, false)
+	require.False(t, item.Finished(), "running tool must not be Finished()")
+
+	tcFinished := tc
+	tcFinished.Finished = true
+	item.SetToolCall(tcFinished)
+	item.SetResult(&message.ToolResult{ToolCallID: "tc-fin", Content: "ok"})
+	require.True(t, item.Finished(), "finished tool with result must be Finished()")
+
+	// Canceled tool with no result is also Finished.
+	tcCanceled := message.ToolCall{ID: "tc-cancel", Name: "bash", Input: "{}", Finished: false}
+	canceled := NewToolMessageItem(&sty, "msg", tcCanceled, nil, true)
+	require.True(t, canceled.Finished(), "canceled tool must be Finished()")
+}

internal/ui/common/markdown.go 🔗

@@ -19,6 +19,12 @@ func init() {
 	formatters.Register(formatterName, xchroma.Formatter(zero, nil))
 }
 
+// mdCacheMu guards mdCache and quietMDCache.
+//
+// Lock ordering: when both mdCacheMu and rendererLocksMu are
+// needed (only in InvalidateMarkdownRendererCache), acquire
+// mdCacheMu FIRST, then rendererLocksMu. No other call site may
+// hold rendererLocksMu while acquiring mdCacheMu.
 var (
 	mdCacheMu    sync.Mutex
 	mdCache      = map[int]*glamour.TermRenderer{}
@@ -29,6 +35,13 @@ var (
 // the given styles and width. Renderers are memoized per width and shared
 // across callers; call InvalidateMarkdownRendererCache when the active
 // styles change.
+//
+// The returned renderer is NOT safe for concurrent Render calls
+// (goldmark's BlockStack carries state across the public Render
+// API). Crush's TUI is single-threaded so production never
+// contends, but parallel callers (most notably parallel tests)
+// must serialize via [LockMarkdownRenderer]. Treat the renderer
+// as effectively pinned to one goroutine at a time.
 func MarkdownRenderer(sty *styles.Styles, width int) *glamour.TermRenderer {
 	mdCacheMu.Lock()
 	defer mdCacheMu.Unlock()
@@ -46,7 +59,8 @@ func MarkdownRenderer(sty *styles.Styles, width int) *glamour.TermRenderer {
 
 // QuietMarkdownRenderer returns a glamour [glamour.TermRenderer] with no colors
 // (plain text with structure) and the given width. Renderers are memoized per
-// width and shared across callers.
+// width and shared across callers. Same concurrency contract as
+// [MarkdownRenderer]: serialize via [LockMarkdownRenderer].
 func QuietMarkdownRenderer(sty *styles.Styles, width int) *glamour.TermRenderer {
 	mdCacheMu.Lock()
 	defer mdCacheMu.Unlock()
@@ -62,12 +76,64 @@ func QuietMarkdownRenderer(sty *styles.Styles, width int) *glamour.TermRenderer
 	return r
 }
 
-// InvalidateMarkdownRendererCache drops every cached renderer. Call this
-// whenever the active styles change so subsequent renderers pick up the new
-// ansi.StyleConfig.
+// InvalidateMarkdownRendererCache drops every cached renderer
+// AND every per-renderer mutex in a single atomic critical
+// section so the two maps cannot disagree mid-toggle. Call this
+// whenever the active styles change so subsequent renderers
+// pick up the new ansi.StyleConfig.
+//
+// Existing holders of an old mutex (mid-Render goroutines) keep
+// their reference safely; new renderers minted after the
+// invalidation get freshly minted mutexes.
+//
+// Lock ordering: mdCacheMu is acquired first, then
+// rendererLocksMu — see the comments on each mutex.
 func InvalidateMarkdownRendererCache() {
 	mdCacheMu.Lock()
 	defer mdCacheMu.Unlock()
+	rendererLocksMu.Lock()
+	defer rendererLocksMu.Unlock()
+
 	mdCache = map[int]*glamour.TermRenderer{}
 	quietMDCache = map[int]*glamour.TermRenderer{}
+	rendererLocks = map[*glamour.TermRenderer]*sync.Mutex{}
+}
+
+// rendererLocksMu guards rendererLocks. We key per-renderer
+// mutexes by pointer so the lock granularity matches the
+// renderer cache granularity (one mutex per (width, palette)
+// renderer instance, not one mutex for the entire cache).
+//
+// Lock ordering: when both mdCacheMu and rendererLocksMu are
+// needed (only in InvalidateMarkdownRendererCache), acquire
+// mdCacheMu FIRST, then rendererLocksMu.
+var (
+	rendererLocksMu sync.Mutex
+	rendererLocks   = map[*glamour.TermRenderer]*sync.Mutex{}
+)
+
+// LockMarkdownRenderer returns the per-renderer mutex used to
+// serialize concurrent Render calls on a shared
+// [glamour.TermRenderer] instance. The returned [*sync.Mutex] is
+// stable for the lifetime of the renderer (i.e. until
+// [InvalidateMarkdownRendererCache] is called).
+//
+// Callers that issue more than one Render call in the same
+// logical operation should hold the mutex for the entire
+// sequence so other goroutines do not interleave their own
+// Render calls and corrupt the renderer state. F8's
+// streamingMarkdown is the immediate consumer; other call
+// sites that today issue exactly one Render call per item
+// render are safe without locking under the single-threaded
+// TUI Update loop, but should adopt this lock if they ever run
+// in parallel (e.g. background prerender workers).
+func LockMarkdownRenderer(r *glamour.TermRenderer) *sync.Mutex {
+	rendererLocksMu.Lock()
+	defer rendererLocksMu.Unlock()
+	if mu, ok := rendererLocks[r]; ok {
+		return mu
+	}
+	mu := &sync.Mutex{}
+	rendererLocks[r] = mu
+	return mu
 }

internal/ui/completions/item.go 🔗

@@ -1,6 +1,8 @@
 package completions
 
 import (
+	"slices"
+
 	"charm.land/lipgloss/v2"
 	"github.com/charmbracelet/crush/internal/ui/list"
 	"github.com/charmbracelet/x/ansi"
@@ -23,6 +25,8 @@ type ResourceCompletionValue struct {
 
 // CompletionItem represents an item in the completions list.
 type CompletionItem struct {
+	*list.Versioned
+
 	text    string
 	value   any
 	match   fuzzy.Match
@@ -38,6 +42,7 @@ type CompletionItem struct {
 // NewCompletionItem creates a new completion item.
 func NewCompletionItem(text string, value any, normalStyle, focusedStyle, matchStyle lipgloss.Style) *CompletionItem {
 	return &CompletionItem{
+		Versioned:    list.NewVersioned(),
 		text:         text,
 		value:        value,
 		normalStyle:  normalStyle,
@@ -46,6 +51,15 @@ func NewCompletionItem(text string, value any, normalStyle, focusedStyle, matchS
 	}
 }
 
+// Finished implements list.Item. Completion items render purely from
+// (text, match, focus); any mutation (SetMatch / SetFocused) bumps
+// Version() so the frozen cache entry invalidates on the next
+// render. Marking them finished lets the F6 list memo skip the
+// per-line work for the steady completions popup.
+func (c *CompletionItem) Finished() bool {
+	return true
+}
+
 // Text returns the display text of the item.
 func (c *CompletionItem) Text() string {
 	return c.text
@@ -63,16 +77,34 @@ func (c *CompletionItem) Filter() string {
 
 // SetMatch implements [list.MatchSettable].
 func (c *CompletionItem) SetMatch(m fuzzy.Match) {
+	if sameFuzzyMatch(c.match, m) {
+		return
+	}
 	c.cache = nil
 	c.match = m
+	c.Bump()
+}
+
+// sameFuzzyMatch reports whether two fuzzy.Match values are
+// observably equal. Because Match contains a slice (MatchedIndexes)
+// it is not directly comparable with ==; we compare the scalar
+// fields and then walk the indexes. SetMatch uses this to skip
+// gratuitous version bumps when the same match is reapplied.
+func sameFuzzyMatch(a, b fuzzy.Match) bool {
+	return a.Str == b.Str &&
+		a.Index == b.Index &&
+		a.Score == b.Score &&
+		slices.Equal(a.MatchedIndexes, b.MatchedIndexes)
 }
 
 // SetFocused implements [list.Focusable].
 func (c *CompletionItem) SetFocused(focused bool) {
-	if c.focused != focused {
-		c.cache = nil
+	if c.focused == focused {
+		return
 	}
+	c.cache = nil
 	c.focused = focused
+	c.Bump()
 }
 
 // Render implements [list.Item].

internal/ui/completions/version_bump_test.go 🔗

@@ -0,0 +1,85 @@
+package completions
+
+import (
+	"testing"
+
+	"charm.land/lipgloss/v2"
+	"github.com/sahilm/fuzzy"
+	"github.com/stretchr/testify/require"
+)
+
+// TestCompletionItem_MutatorsBumpVersion covers F6 §4.5 for the
+// completions popup: SetMatch and SetFocused must bump Version()
+// when the observable state changes, and dedupe (no bump) when the
+// supplied value is identical to the current state. Without
+// dedupe, the steady completions popup would invalidate the
+// list-level memo on every keystroke that produced the same match.
+func TestCompletionItem_MutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	mkItem := func() *CompletionItem {
+		return NewCompletionItem(
+			"internal/ui/chat/user.go",
+			FileCompletionValue{Path: "internal/ui/chat/user.go"},
+			lipgloss.NewStyle(),
+			lipgloss.NewStyle(),
+			lipgloss.NewStyle(),
+		)
+	}
+
+	t.Run("SetFocused", func(t *testing.T) {
+		t.Parallel()
+		item := mkItem()
+
+		// First transition (false -> true) must bump.
+		before := item.Version()
+		item.SetFocused(true)
+		require.Greater(t, item.Version(), before, "SetFocused(true) must bump")
+
+		// Re-applying the same focus state must not bump.
+		stable := item.Version()
+		item.SetFocused(true)
+		require.Equal(t, stable, item.Version(), "SetFocused with same value must not bump")
+
+		// Transition back must bump.
+		item.SetFocused(false)
+		require.Greater(t, item.Version(), stable, "SetFocused(false) must bump")
+	})
+
+	t.Run("SetMatch", func(t *testing.T) {
+		t.Parallel()
+		item := mkItem()
+
+		match := fuzzy.Match{
+			Str:            "user",
+			Index:          0,
+			Score:          5,
+			MatchedIndexes: []int{0, 1, 2, 3},
+		}
+		before := item.Version()
+		item.SetMatch(match)
+		require.Greater(t, item.Version(), before, "SetMatch with new value must bump")
+
+		// Re-applying an equivalent match (same fields, equal slice
+		// contents) must not bump.
+		stable := item.Version()
+		same := fuzzy.Match{
+			Str:            "user",
+			Index:          0,
+			Score:          5,
+			MatchedIndexes: []int{0, 1, 2, 3},
+		}
+		item.SetMatch(same)
+		require.Equal(t, stable, item.Version(), "SetMatch with equivalent value must not bump")
+
+		// A different match (different MatchedIndexes) must bump.
+		different := fuzzy.Match{
+			Str:            "user",
+			Index:          0,
+			Score:          5,
+			MatchedIndexes: []int{0, 2},
+		}
+		item.SetMatch(different)
+		require.Greater(t, item.Version(), stable, "SetMatch with different indexes must bump")
+	})
+}

internal/ui/dialog/commands_item.go 🔗

@@ -3,12 +3,14 @@ package dialog
 import (
 	"strings"
 
+	"github.com/charmbracelet/crush/internal/ui/list"
 	"github.com/charmbracelet/crush/internal/ui/styles"
 	"github.com/sahilm/fuzzy"
 )
 
 // CommandItem wraps a uicmd.Command to implement the ListItem interface.
 type CommandItem struct {
+	*list.Versioned
 	id       string
 	title    string
 	shortcut string
@@ -20,19 +22,26 @@ type CommandItem struct {
 	focused  bool
 }
 
-var _ ListItem = &CommandItem{}
+var _ ListItem = &CommandItem{Versioned: list.NewVersioned()}
 
 // NewCommandItem creates a new CommandItem.
 func NewCommandItem(t *styles.Styles, id, title, shortcut string, action Action) *CommandItem {
 	return &CommandItem{
-		id:       id,
-		t:        t,
-		title:    title,
-		shortcut: shortcut,
-		action:   action,
+		Versioned: list.NewVersioned(),
+		id:        id,
+		t:         t,
+		title:     title,
+		shortcut:  shortcut,
+		action:    action,
 	}
 }
 
+// Finished implements list.Item. Command items are render-stable
+// outside of explicit SetFocused / SetMatch.
+func (c *CommandItem) Finished() bool {
+	return true
+}
+
 // WithAliases returns the CommandItem with the given aliases for filtering.
 func (c *CommandItem) WithAliases(aliases ...string) *CommandItem {
 	c.aliases = aliases
@@ -54,16 +63,26 @@ func (c *CommandItem) ID() string {
 
 // SetFocused implements ListItem.
 func (c *CommandItem) SetFocused(focused bool) {
-	if c.focused != focused {
-		c.cache = nil
+	if c.focused == focused {
+		return
 	}
+	c.cache = nil
 	c.focused = focused
+	if c.Versioned != nil {
+		c.Bump()
+	}
 }
 
 // SetMatch implements ListItem.
 func (c *CommandItem) SetMatch(m fuzzy.Match) {
+	if sameFuzzyMatch(c.m, m) {
+		return
+	}
 	c.cache = nil
 	c.m = m
+	if c.Versioned != nil {
+		c.Bump()
+	}
 }
 
 // Action returns the action associated with the command item.

internal/ui/dialog/models_item.go 🔗

@@ -5,6 +5,7 @@ import (
 	"charm.land/lipgloss/v2"
 	"github.com/charmbracelet/crush/internal/config"
 	"github.com/charmbracelet/crush/internal/ui/common"
+	"github.com/charmbracelet/crush/internal/ui/list"
 	"github.com/charmbracelet/crush/internal/ui/styles"
 	"github.com/charmbracelet/x/ansi"
 	"github.com/sahilm/fuzzy"
@@ -12,6 +13,7 @@ import (
 
 // ModelGroup represents a group of model items.
 type ModelGroup struct {
+	*list.Versioned
 	Title      string
 	Items      []*ModelItem
 	configured bool
@@ -21,6 +23,7 @@ type ModelGroup struct {
 // NewModelGroup creates a new ModelGroup.
 func NewModelGroup(t *styles.Styles, title string, configured bool, items ...*ModelItem) ModelGroup {
 	return ModelGroup{
+		Versioned:  list.NewVersioned(),
 		Title:      title,
 		Items:      items,
 		configured: configured,
@@ -28,6 +31,11 @@ func NewModelGroup(t *styles.Styles, title string, configured bool, items ...*Mo
 	}
 }
 
+// Finished implements list.Item. Model groups are immutable headers.
+func (m *ModelGroup) Finished() bool {
+	return true
+}
+
 // AppendItems appends [ModelItem]s to the group.
 func (m *ModelGroup) AppendItems(items ...*ModelItem) {
 	m.Items = append(m.Items, items...)
@@ -50,6 +58,8 @@ func (m *ModelGroup) Render(width int) string {
 
 // ModelItem represents a list item for a model type.
 type ModelItem struct {
+	*list.Versioned
+
 	prov      catwalk.Provider
 	model     catwalk.Model
 	modelType ModelType
@@ -61,6 +71,12 @@ type ModelItem struct {
 	showProvider bool
 }
 
+// Finished implements list.Item. Model items are render-stable
+// outside of explicit SetFocused / SetMatch.
+func (m *ModelItem) Finished() bool {
+	return true
+}
+
 // SelectedModel returns this model item as a [config.SelectedModel] instance.
 func (m *ModelItem) SelectedModel() config.SelectedModel {
 	return config.SelectedModel{
@@ -81,6 +97,7 @@ var _ ListItem = &ModelItem{}
 // NewModelItem creates a new ModelItem.
 func NewModelItem(t *styles.Styles, prov catwalk.Provider, model catwalk.Model, typ ModelType, showProvider bool) *ModelItem {
 	return &ModelItem{
+		Versioned:    list.NewVersioned(),
 		prov:         prov,
 		model:        model,
 		modelType:    typ,
@@ -117,14 +134,24 @@ func (m *ModelItem) Render(width int) string {
 
 // SetFocused implements ListItem.
 func (m *ModelItem) SetFocused(focused bool) {
-	if m.focused != focused {
-		m.cache = nil
+	if m.focused == focused {
+		return
 	}
+	m.cache = nil
 	m.focused = focused
+	if m.Versioned != nil {
+		m.Bump()
+	}
 }
 
 // SetMatch implements ListItem.
 func (m *ModelItem) SetMatch(fm fuzzy.Match) {
+	if sameFuzzyMatch(m.m, fm) {
+		return
+	}
 	m.cache = nil
 	m.m = fm
+	if m.Versioned != nil {
+		m.Bump()
+	}
 }

internal/ui/dialog/reasoning.go 🔗

@@ -40,6 +40,7 @@ type Reasoning struct {
 
 // ReasoningItem represents a reasoning effort list item.
 type ReasoningItem struct {
+	*list.Versioned
 	effort    string
 	title     string
 	isCurrent bool
@@ -49,6 +50,12 @@ type ReasoningItem struct {
 	focused   bool
 }
 
+// Finished implements list.Item. Reasoning items are render-stable
+// outside of explicit SetFocused / SetMatch.
+func (r *ReasoningItem) Finished() bool {
+	return true
+}
+
 var (
 	_ Dialog   = (*Reasoning)(nil)
 	_ ListItem = (*ReasoningItem)(nil)
@@ -243,6 +250,7 @@ func (r *Reasoning) setReasoningItems() error {
 	selectedIndex := 0
 	for i, effort := range model.ReasoningLevels {
 		item := &ReasoningItem{
+			Versioned: list.NewVersioned(),
 			effort:    effort,
 			title:     common.FormatReasoningEffort(effort),
 			isCurrent: effort == currentEffort,
@@ -272,16 +280,26 @@ func (r *ReasoningItem) ID() string {
 
 // SetFocused sets the focus state of the reasoning item.
 func (r *ReasoningItem) SetFocused(focused bool) {
-	if r.focused != focused {
-		r.cache = nil
+	if r.focused == focused {
+		return
 	}
+	r.cache = nil
 	r.focused = focused
+	if r.Versioned != nil {
+		r.Bump()
+	}
 }
 
 // SetMatch sets the fuzzy match for the reasoning item.
 func (r *ReasoningItem) SetMatch(m fuzzy.Match) {
+	if sameFuzzyMatch(r.m, m) {
+		return
+	}
 	r.cache = nil
 	r.m = m
+	if r.Versioned != nil {
+		r.Bump()
+	}
 }
 
 // Render returns the string representation of the reasoning item.

internal/ui/dialog/sessions_item.go 🔗

@@ -2,6 +2,7 @@ package dialog
 
 import (
 	"fmt"
+	"slices"
 	"strings"
 	"time"
 
@@ -17,6 +18,19 @@ import (
 	"github.com/sahilm/fuzzy"
 )
 
+// sameFuzzyMatch reports whether two fuzzy.Match values are
+// observably equal. Because Match contains a slice (MatchedIndexes)
+// it is not directly comparable with ==; we compare the scalar
+// fields and then walk the indexes. The dialog list items use this
+// to skip gratuitous version bumps when SetMatch reapplies the same
+// match.
+func sameFuzzyMatch(a, b fuzzy.Match) bool {
+	return a.Str == b.Str &&
+		a.Index == b.Index &&
+		a.Score == b.Score &&
+		slices.Equal(a.MatchedIndexes, b.MatchedIndexes)
+}
+
 // ListItem represents a selectable and searchable item in a dialog list.
 type ListItem interface {
 	list.FilterableItem
@@ -29,6 +43,7 @@ type ListItem interface {
 
 // SessionItem wraps a [session.Session] to implement the [ListItem] interface.
 type SessionItem struct {
+	*list.Versioned
 	session.Session
 	t                *styles.Styles
 	sessionsMode     sessionsMode
@@ -38,6 +53,13 @@ type SessionItem struct {
 	focused          bool
 }
 
+// Finished implements list.Item. Session items are render-stable
+// outside of explicit SetFocused / SetMatch calls, both of which
+// bump Version() and therefore invalidate the F6 frozen entry.
+func (s *SessionItem) Finished() bool {
+	return true
+}
+
 var _ ListItem = &SessionItem{}
 
 // Filter returns the filterable value of the session.
@@ -52,8 +74,14 @@ func (s *SessionItem) ID() string {
 
 // SetMatch sets the fuzzy match for the session item.
 func (s *SessionItem) SetMatch(m fuzzy.Match) {
+	if sameFuzzyMatch(s.m, m) {
+		return
+	}
 	s.cache = nil
 	s.m = m
+	if s.Versioned != nil {
+		s.Bump()
+	}
 }
 
 // InputValue returns the updated title value
@@ -177,10 +205,14 @@ func renderItem(t ListItemStyles, title string, info string, focused bool, width
 
 // SetFocused sets the focus state of the session item.
 func (s *SessionItem) SetFocused(focused bool) {
-	if s.focused != focused {
-		s.cache = nil
+	if s.focused == focused {
+		return
 	}
+	s.cache = nil
 	s.focused = focused
+	if s.Versioned != nil {
+		s.Bump()
+	}
 }
 
 // sessionItems takes a slice of [session.Session]s and convert them to a slice
@@ -188,7 +220,7 @@ func (s *SessionItem) SetFocused(focused bool) {
 func sessionItems(t *styles.Styles, mode sessionsMode, sessions ...session.Session) []list.FilterableItem {
 	items := make([]list.FilterableItem, len(sessions))
 	for i, s := range sessions {
-		item := &SessionItem{Session: s, t: t, sessionsMode: mode}
+		item := &SessionItem{Versioned: list.NewVersioned(), Session: s, t: t, sessionsMode: mode}
 		if mode == sessionsModeUpdating {
 			item.updateTitleInput = textinput.New()
 			item.updateTitleInput.SetVirtualCursor(false)

internal/ui/dialog/version_bump_test.go 🔗

@@ -0,0 +1,219 @@
+package dialog
+
+import (
+	"testing"
+
+	"charm.land/catwalk/pkg/catwalk"
+	"github.com/charmbracelet/crush/internal/session"
+	"github.com/charmbracelet/crush/internal/ui/list"
+	"github.com/charmbracelet/crush/internal/ui/styles"
+	"github.com/sahilm/fuzzy"
+	"github.com/stretchr/testify/require"
+)
+
+// versionedItem is the cross-cutting interface every dialog list
+// item must satisfy under F6: every documented mutator must bump
+// the shared version counter so the list-level memo invalidates
+// frozen entries.
+type versionedItem interface {
+	list.Item
+	Version() uint64
+}
+
+// requireBump asserts that running mutate() advances the item's
+// Version().
+func requireBump(t *testing.T, name string, item versionedItem, mutate func()) {
+	t.Helper()
+	before := item.Version()
+	mutate()
+	after := item.Version()
+	require.Greaterf(t, after, before, "%s must bump Version() (before=%d, after=%d)", name, before, after)
+}
+
+// requireNoBump asserts that running mutate() leaves the item's
+// Version() unchanged. Used to lock in the dedupe contract: a
+// mutator called with a value identical to the current state must
+// not gratuitously invalidate the list cache.
+func requireNoBump(t *testing.T, name string, item versionedItem, mutate func()) {
+	t.Helper()
+	before := item.Version()
+	mutate()
+	after := item.Version()
+	require.Equalf(t, before, after, "%s must NOT bump Version() when state is unchanged (before=%d, after=%d)", name, before, after)
+}
+
+// equivMatch returns a fuzzy.Match whose fields and indexes are
+// equivalent to the supplied seed but allocated as a fresh struct
+// so callers exercise the value-equality dedupe path rather than
+// referential equality.
+func equivMatch(seed fuzzy.Match) fuzzy.Match {
+	return fuzzy.Match{
+		Str:            seed.Str,
+		Index:          seed.Index,
+		Score:          seed.Score,
+		MatchedIndexes: append([]int(nil), seed.MatchedIndexes...),
+	}
+}
+
+// TestCommandItem_MutatorsBumpVersion covers F6 §4.5 for the
+// commands palette items: SetFocused and SetMatch bump Version()
+// on observable change and dedupe otherwise.
+func TestCommandItem_MutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	item := NewCommandItem(&sty, "id", "Title", "ctrl+t", nil)
+
+	requireBump(t, "SetFocused[true]", item, func() {
+		item.SetFocused(true)
+	})
+	requireNoBump(t, "SetFocused[true again]", item, func() {
+		item.SetFocused(true)
+	})
+	requireBump(t, "SetFocused[false]", item, func() {
+		item.SetFocused(false)
+	})
+
+	match := fuzzy.Match{
+		Str:            "Title",
+		Index:          0,
+		Score:          5,
+		MatchedIndexes: []int{0, 1, 2},
+	}
+	requireBump(t, "SetMatch[new]", item, func() {
+		item.SetMatch(match)
+	})
+	requireNoBump(t, "SetMatch[same]", item, func() {
+		item.SetMatch(equivMatch(match))
+	})
+	requireBump(t, "SetMatch[different]", item, func() {
+		item.SetMatch(fuzzy.Match{
+			Str:            "Title",
+			Index:          0,
+			Score:          5,
+			MatchedIndexes: []int{0, 2},
+		})
+	})
+}
+
+// TestModelItem_MutatorsBumpVersion covers F6 §4.5 for the model
+// picker items.
+func TestModelItem_MutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	prov := catwalk.Provider{ID: "openai", Name: "OpenAI"}
+	model := catwalk.Model{ID: "gpt-4", Name: "GPT-4"}
+	item := NewModelItem(&sty, prov, model, ModelTypeLarge, true)
+
+	requireBump(t, "SetFocused[true]", item, func() {
+		item.SetFocused(true)
+	})
+	requireNoBump(t, "SetFocused[true again]", item, func() {
+		item.SetFocused(true)
+	})
+
+	match := fuzzy.Match{
+		Str:            "GPT-4",
+		Index:          0,
+		Score:          5,
+		MatchedIndexes: []int{0, 1, 2},
+	}
+	requireBump(t, "SetMatch[new]", item, func() {
+		item.SetMatch(match)
+	})
+	requireNoBump(t, "SetMatch[same]", item, func() {
+		item.SetMatch(equivMatch(match))
+	})
+	requireBump(t, "SetMatch[different]", item, func() {
+		item.SetMatch(fuzzy.Match{
+			Str:            "GPT-4",
+			Index:          0,
+			Score:          5,
+			MatchedIndexes: []int{1},
+		})
+	})
+}
+
+// TestSessionItem_MutatorsBumpVersion covers F6 §4.5 for the
+// sessions dialog items.
+func TestSessionItem_MutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	item := &SessionItem{
+		Versioned: list.NewVersioned(),
+		Session:   session.Session{ID: "sess-1", Title: "My Session"},
+		t:         &sty,
+	}
+
+	requireBump(t, "SetFocused[true]", item, func() {
+		item.SetFocused(true)
+	})
+	requireNoBump(t, "SetFocused[true again]", item, func() {
+		item.SetFocused(true)
+	})
+
+	match := fuzzy.Match{
+		Str:            "My Session",
+		Index:          0,
+		Score:          5,
+		MatchedIndexes: []int{0, 1, 2},
+	}
+	requireBump(t, "SetMatch[new]", item, func() {
+		item.SetMatch(match)
+	})
+	requireNoBump(t, "SetMatch[same]", item, func() {
+		item.SetMatch(equivMatch(match))
+	})
+	requireBump(t, "SetMatch[different]", item, func() {
+		item.SetMatch(fuzzy.Match{
+			Str:            "My Session",
+			Index:          0,
+			Score:          5,
+			MatchedIndexes: []int{3, 4},
+		})
+	})
+}
+
+// TestReasoningItem_MutatorsBumpVersion covers F6 §4.5 for the
+// reasoning effort dialog items.
+func TestReasoningItem_MutatorsBumpVersion(t *testing.T) {
+	t.Parallel()
+
+	sty := styles.CharmtonePantera()
+	item := &ReasoningItem{
+		Versioned: list.NewVersioned(),
+		effort:    "medium",
+		title:     "Medium",
+		t:         &sty,
+	}
+
+	requireBump(t, "SetFocused[true]", item, func() {
+		item.SetFocused(true)
+	})
+	requireNoBump(t, "SetFocused[true again]", item, func() {
+		item.SetFocused(true)
+	})
+
+	match := fuzzy.Match{
+		Str:            "Medium",
+		Index:          0,
+		Score:          5,
+		MatchedIndexes: []int{0, 1, 2},
+	}
+	requireBump(t, "SetMatch[new]", item, func() {
+		item.SetMatch(match)
+	})
+	requireNoBump(t, "SetMatch[same]", item, func() {
+		item.SetMatch(equivMatch(match))
+	})
+	requireBump(t, "SetMatch[different]", item, func() {
+		item.SetMatch(fuzzy.Match{
+			Str:            "Medium",
+			Index:          0,
+			Score:          5,
+			MatchedIndexes: []int{2, 3},
+		})
+	})
+}

internal/ui/list/item.go 🔗

@@ -7,10 +7,66 @@ import (
 )
 
 // Item represents a single item in the lazy-loaded list.
+//
+// Items participate in the list-level render memo (F6). The cache key
+// for each item is (pointer, width, version). Items must:
+//
+//   - Bump their version (via the embedded *Versioned helper) on every
+//     mutation that changes the rendered output.
+//   - Return Finished() == true once their rendered output will not
+//     change again unless an explicit mutator is invoked. Frozen
+//     entries are emitted verbatim — no Render call — until either
+//     Version() bumps, the viewport width changes, or the list
+//     explicitly invalidates the entry.
 type Item interface {
 	// Render returns the string representation of the item for the given
 	// width.
 	Render(width int) string
+
+	// Version returns a monotonic counter that the list-level cache
+	// uses to detect mutations. Items must increment the version
+	// (via Versioned.Bump) on every state change that would alter
+	// the rendered output.
+	Version() uint64
+
+	// Finished reports whether the item's rendered output has reached
+	// a terminal state and may be frozen by the list cache. Items
+	// that animate, stream, or otherwise still mutate must return
+	// false. A finished item that later mutates must bump its
+	// version on the mutation; the cache treats version bumps as
+	// implicit unfreeze + invalidate.
+	Finished() bool
+}
+
+// Versioned is a tiny embeddable helper that satisfies Item.Version()
+// and provides a Bump() method to call from every state-mutating
+// method. Items typically embed *Versioned alongside their other
+// helpers; see chat.AssistantMessageItem for the canonical wiring.
+//
+// Bump() is not safe for concurrent use; callers must hold whatever
+// synchronization their item type already requires for state
+// mutations. The list itself never reads Version() from a goroutine
+// other than the UI thread.
+type Versioned struct {
+	v uint64
+}
+
+// NewVersioned returns a fresh *Versioned at version zero.
+func NewVersioned() *Versioned {
+	return &Versioned{}
+}
+
+// Version returns the current version counter.
+func (vc *Versioned) Version() uint64 {
+	return vc.v
+}
+
+// Bump advances the version counter by one. Mutators on items that
+// affect the rendered output must call Bump exactly once per
+// observable state change. Bumping more than once per change is
+// harmless other than a single extra cache miss.
+func (vc *Versioned) Bump() {
+	vc.v++
 }
 
 // RawRenderable represents an item that can provide a raw rendering
@@ -45,13 +101,15 @@ type MouseClickable interface {
 
 // SpacerItem is a spacer item that adds vertical space in the list.
 type SpacerItem struct {
+	*Versioned
 	Height int
 }
 
 // NewSpacerItem creates a new [SpacerItem] with the specified height.
 func NewSpacerItem(height int) *SpacerItem {
 	return &SpacerItem{
-		Height: max(0, height-1),
+		Versioned: NewVersioned(),
+		Height:    max(0, height-1),
 	}
 }
 
@@ -59,3 +117,10 @@ func NewSpacerItem(height int) *SpacerItem {
 func (s *SpacerItem) Render(width int) string {
 	return strings.Repeat("\n", s.Height)
 }
+
+// Finished implements Item. SpacerItems are immutable in practice and
+// safe to freeze; any mutation goes through Versioned.Bump which
+// invalidates the frozen entry.
+func (s *SpacerItem) Finished() bool {
+	return true
+}

internal/ui/list/list.go 🔗

@@ -33,9 +33,40 @@ type List struct {
 
 	// renderCallbacks is a list of callbacks to apply when rendering items.
 	renderCallbacks []func(idx, selectedIdx int, item Item) Item
+
+	// cache is the F6 list-level render memo, keyed by item pointer.
+	// Each entry stores the rendered content, a pre-split slice of
+	// lines (so AtBottom / Render / VisibleItemIndices /
+	// findItemAtY all share one render per frame), the height, and
+	// the keys that govern invalidation (width and version). The
+	// frozen flag mirrors §4.5.1: once a Finished() item is
+	// rendered, subsequent draws return the stored output verbatim
+	// without calling back into Render.
+	cache map[Item]*listCacheEntry
+
+	// freezeSuppressed marks items the list must not freeze on the
+	// next render even when their Finished() reports true. This is
+	// the §4.5.1 selection-drag escape hatch (option (a)): items
+	// inside an active selection range render as live items so that
+	// per-line highlight overlays land on the latest content. Cleared
+	// on EndSelectionDrag.
+	freezeSuppressed map[Item]struct{}
+}
+
+// listCacheEntry is the per-item entry in the list-level render memo.
+type listCacheEntry struct {
+	width   int
+	version uint64
+	frozen  bool
+	content string
+	lines   []string
+	height  int
 }
 
-// renderedItem holds the rendered content and height of an item.
+// renderedItem is the legacy view of a cached entry returned by getItem.
+// Internal callers that don't need the line slice keep using this
+// shape; functions that walk lines (Render) take the slice off the
+// cache entry directly.
 type renderedItem struct {
 	content string
 	height  int
@@ -46,6 +77,8 @@ func NewList(items ...Item) *List {
 	l := new(List)
 	l.items = items
 	l.selectedIdx = -1
+	l.cache = make(map[Item]*listCacheEntry)
+	l.freezeSuppressed = make(map[Item]struct{})
 	return l
 }
 
@@ -59,8 +92,13 @@ func (l *List) RegisterRenderCallback(cb RenderCallback) {
 	l.renderCallbacks = append(l.renderCallbacks, cb)
 }
 
-// SetSize sets the size of the list viewport.
+// SetSize sets the size of the list viewport. A width change drops the
+// entire render cache because every entry's wrapped output depends on
+// width; a height-only change is a no-op for the cache.
 func (l *List) SetSize(width, height int) {
+	if l.width != width {
+		l.invalidateAll()
+	}
 	l.width = width
 	l.height = height
 }
@@ -144,12 +182,42 @@ func (l *List) lastOffsetItem() (int, int, int) {
 }
 
 // getItem renders (if needed) and returns the item at the given index.
+// The result is served from the F6 cache when possible — see
+// renderItemEntry for the cache-key semantics.
 func (l *List) getItem(idx int) renderedItem {
 	if idx < 0 || idx >= len(l.items) {
 		return renderedItem{}
 	}
+	entry := l.renderItemEntry(idx)
+	if entry == nil {
+		return renderedItem{}
+	}
+	return renderedItem{content: entry.content, height: entry.height}
+}
+
+// renderItemEntry returns the cache entry for the given index, populating
+// the cache on miss. The result must not be retained past the next
+// invalidation (SetSize width change, SetItems, etc.).
+//
+// Render callbacks always run, even for frozen entries: callbacks
+// are how the list discovers per-frame state changes (selection,
+// highlight range) and they bump the item's version when those
+// changes affect the rendered output. A frozen item whose callback
+// run is a no-op (same focus, same highlight) keeps its stored
+// version and the cache hit is preserved on the post-callback
+// version check.
+func (l *List) renderItemEntry(idx int) *listCacheEntry {
+	if idx < 0 || idx >= len(l.items) {
+		return nil
+	}
 
-	item := l.items[idx]
+	rawItem := l.items[idx]
+	entry := l.cache[rawItem]
+
+	// Run render callbacks. Callbacks may mutate the item (focus,
+	// highlight) which in turn bumps its version when state actually
+	// changes. We capture the post-callback version below.
+	item := rawItem
 	if len(l.renderCallbacks) > 0 {
 		for _, cb := range l.renderCallbacks {
 			if it := cb(idx, l.selectedIdx, item); it != nil {
@@ -158,15 +226,128 @@ func (l *List) getItem(idx int) renderedItem {
 		}
 	}
 
+	version := rawItem.Version()
+	if entry != nil && entry.width == l.width && entry.version == version {
+		// Cache hit — frozen or unfrozen, the entry content is
+		// still correct because no version bump landed since the
+		// last render. Selection-drag suppression turns this into
+		// a miss only if the entry is frozen.
+		if !entry.frozen {
+			return entry
+		}
+		if _, suppressed := l.freezeSuppressed[rawItem]; !suppressed {
+			return entry
+		}
+	}
+
 	rendered := item.Render(l.width)
 	rendered = strings.TrimRight(rendered, "\n")
-	height := strings.Count(rendered, "\n") + 1
-	ri := renderedItem{
-		content: rendered,
-		height:  height,
+	lines := strings.Split(rendered, "\n")
+	height := len(lines)
+
+	// Re-read the version after Render so that any version bumps
+	// caused by Render itself (e.g. an item that mutates internal
+	// state during rendering) are captured. Without this we would
+	// freeze a stale entry under the post-render version.
+	finalVersion := rawItem.Version()
+
+	frozen := false
+	if rawItem.Finished() {
+		if _, suppressed := l.freezeSuppressed[rawItem]; !suppressed {
+			frozen = true
+		}
+	}
+
+	if entry == nil {
+		entry = &listCacheEntry{}
+		l.cache[rawItem] = entry
 	}
+	entry.width = l.width
+	entry.version = finalVersion
+	entry.frozen = frozen
+	entry.content = rendered
+	entry.lines = lines
+	entry.height = height
+	return entry
+}
+
+// invalidateAll drops every cache entry. Called on width changes.
+func (l *List) invalidateAll() {
+	for k := range l.cache {
+		delete(l.cache, k)
+	}
+}
 
-	return ri
+// Invalidate drops the cache entry for the given item, forcing a
+// re-render on the next getItem call. No-op if the item is not in
+// the cache.
+func (l *List) Invalidate(item Item) {
+	delete(l.cache, item)
+}
+
+// InvalidateFrozen drops the frozen flag (and stored content) for the
+// given item. Equivalent to Invalidate but exposed under the F6
+// frozen-items vocabulary so external callers can express intent.
+func (l *List) InvalidateFrozen(item Item) {
+	delete(l.cache, item)
+}
+
+// retainCacheFor drops every cache entry whose key is not in the given
+// item set. Used by SetItems to keep entries for stable items while
+// dropping entries for removed ones.
+func (l *List) retainCacheFor(items []Item) {
+	if len(l.cache) == 0 {
+		return
+	}
+	keep := make(map[Item]struct{}, len(items))
+	for _, it := range items {
+		keep[it] = struct{}{}
+	}
+	for k := range l.cache {
+		if _, ok := keep[k]; !ok {
+			delete(l.cache, k)
+		}
+	}
+}
+
+// BeginSelectionDrag marks the items in the inclusive [startIdx, endIdx]
+// range as un-freezable for the duration of an active selection drag.
+// Frozen entries inside the range are dropped so the next render
+// reflects live selection-overlay output. The corresponding
+// EndSelectionDrag clears the suppression set and lets items
+// re-freeze on their next render. Indices outside the items slice
+// are clipped silently.
+func (l *List) BeginSelectionDrag(startIdx, endIdx int) {
+	if len(l.items) == 0 {
+		return
+	}
+	if startIdx > endIdx {
+		startIdx, endIdx = endIdx, startIdx
+	}
+	startIdx = max(startIdx, 0)
+	endIdx = min(endIdx, len(l.items)-1)
+	for i := startIdx; i <= endIdx; i++ {
+		it := l.items[i]
+		l.freezeSuppressed[it] = struct{}{}
+		// Drop any cached frozen entry so the next render rebuilds
+		// it as a live (un-frozen) entry that picks up the
+		// selection overlay.
+		if entry, ok := l.cache[it]; ok && entry.frozen {
+			delete(l.cache, it)
+		}
+	}
+}
+
+// EndSelectionDrag clears the selection-drag freeze suppression. Items
+// inside the previous range will re-freeze on their next render once
+// their Finished() reports true again.
+func (l *List) EndSelectionDrag() {
+	for k := range l.freezeSuppressed {
+		delete(l.freezeSuppressed, k)
+		// Drop the cache entry so the next render produces a clean
+		// (un-highlighted) frozen entry.
+		delete(l.cache, k)
+	}
 }
 
 // ScrollToIndex scrolls the list to the given item index.
@@ -276,54 +457,75 @@ func (l *List) VisibleItemIndices() (startIdx, endIdx int) {
 }
 
 // Render renders the list and returns the visible lines.
+//
+// F7: per-item slicing is bounded by the remaining viewport budget so
+// per-frame work is O(viewport) rather than O(total item heights).
+// We never append beyond l.height lines to the output buffer; the
+// final trim is therefore unnecessary. Reverse mode applies the same
+// final reversal as before, which is byte-identical because the
+// pre-F7 trim happened at the tail of the joined buffer (the same
+// lines we now drop implicitly per item).
 func (l *List) Render() string {
 	if len(l.items) == 0 {
 		return ""
 	}
 
-	var lines []string
+	budget := max(l.height, 0)
+	lines := make([]string, 0, budget)
 	currentIdx := l.offsetIdx
 	currentOffset := l.offsetLine
 
-	linesNeeded := l.height
+	for currentIdx < len(l.items) {
+		remaining := budget - len(lines)
+		if remaining <= 0 {
+			break
+		}
 
-	for linesNeeded > 0 && currentIdx < len(l.items) {
-		item := l.getItem(currentIdx)
-		itemLines := strings.Split(item.content, "\n")
+		entry := l.renderItemEntry(currentIdx)
+		if entry == nil {
+			break
+		}
+		itemLines := entry.lines
 		itemHeight := len(itemLines)
 
 		if currentOffset >= 0 && currentOffset < itemHeight {
-			// Add visible content lines
-			lines = append(lines, itemLines[currentOffset:]...)
+			// Append only the visible slice that fits in the
+			// remaining viewport budget. Anything past the
+			// budget would be discarded by the pre-F7 tail
+			// trim, so skipping the append here is
+			// byte-identical and bounded.
+			visible := itemLines[currentOffset:]
+			if len(visible) > remaining {
+				visible = visible[:remaining]
+			}
+			lines = append(lines, visible...)
 
-			// Add gap if this is not the absolute last visual element (conceptually gaps are between items)
-			// But in the loop we can just add it and trim later
+			// Gap rows after the item, capped to the
+			// remaining budget so a 30k-line item with a
+			// trailing gap can't push past the viewport.
 			if l.gap > 0 {
-				for i := 0; i < l.gap; i++ {
+				gapBudget := min(budget-len(lines), l.gap)
+				for range gapBudget {
 					lines = append(lines, "")
 				}
 			}
 		} else {
-			// offsetLine starts in the gap
+			// offsetLine starts inside the gap.
 			gapOffset := currentOffset - itemHeight
 			gapRemaining := l.gap - gapOffset
 			if gapRemaining > 0 {
-				for range gapRemaining {
+				gapBudget := min(budget-len(lines), gapRemaining)
+				for range gapBudget {
 					lines = append(lines, "")
 				}
 			}
 		}
 
-		linesNeeded = l.height - len(lines)
 		currentIdx++
-		currentOffset = 0 // Reset offset for subsequent items
+		currentOffset = 0 // Reset offset for subsequent items.
 	}
 
-	l.height = max(l.height, 0)
-
-	if len(lines) > l.height {
-		lines = lines[:l.height]
-	}
+	l.height = budget
 
 	if l.reverse {
 		// Reverse the lines so the list renders bottom-to-top.
@@ -348,18 +550,15 @@ func (l *List) PrependItems(items ...Item) {
 	}
 }
 
-// SetItems sets the items in the list.
+// SetItems sets the items in the list. Cache entries for items that
+// remain after the swap are preserved; entries for removed items are
+// dropped.
 func (l *List) SetItems(items ...Item) {
-	l.setItems(true, items...)
-}
-
-// setItems sets the items in the list. If evict is true, it clears the
-// rendered item cache.
-func (l *List) setItems(evict bool, items ...Item) {
 	l.items = items
 	l.selectedIdx = min(l.selectedIdx, len(l.items)-1)
 	l.offsetIdx = min(l.offsetIdx, len(l.items)-1)
 	l.offsetLine = 0
+	l.retainCacheFor(items)
 }
 
 // AppendItems appends items to the list.
@@ -373,9 +572,16 @@ func (l *List) RemoveItem(idx int) {
 		return
 	}
 
+	removed := l.items[idx]
+
 	// Remove the item
 	l.items = append(l.items[:idx], l.items[idx+1:]...)
 
+	// Drop the cache entry for the removed item; entries for stable
+	// items stay valid because they are keyed by pointer, not index.
+	delete(l.cache, removed)
+	delete(l.freezeSuppressed, removed)
+
 	// Adjust selection if needed
 	if l.selectedIdx == idx {
 		l.selectedIdx = -1

internal/ui/list/list_test.go 🔗

@@ -0,0 +1,770 @@
+package list
+
+import (
+	"strconv"
+	"strings"
+	"testing"
+
+	"github.com/stretchr/testify/require"
+)
+
+// trackedItem is a test helper that counts Render calls. The body of
+// Render is the item's content concatenated with the call counter so
+// that "served from cache" vs "freshly rendered" is observable from
+// the rendered string itself.
+type trackedItem struct {
+	*Versioned
+	id         string
+	body       string
+	finished   bool
+	renderHits int
+}
+
+func newTrackedItem(id, body string, finished bool) *trackedItem {
+	return &trackedItem{
+		Versioned: NewVersioned(),
+		id:        id,
+		body:      body,
+		finished:  finished,
+	}
+}
+
+func (t *trackedItem) Render(width int) string {
+	t.renderHits++
+	return t.body + ":w=" + strconv.Itoa(width)
+}
+
+func (t *trackedItem) Finished() bool {
+	return t.finished
+}
+
+// TestList_RenderMemo_PointerKey covers the F6 invariant that the
+// list-level cache is keyed by item pointer, not slice index, so
+// PrependItems and AppendItems do not shift cached entries to the
+// wrong item.
+func TestList_RenderMemo_PointerKey(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", false)
+	b := newTrackedItem("b", "bravo", false)
+	c := newTrackedItem("c", "charlie", false)
+
+	l := NewList(a, b, c)
+	l.SetSize(40, 10)
+
+	// First render populates the cache for every item.
+	first := l.Render()
+	require.Equal(t, 1, a.renderHits)
+	require.Equal(t, 1, b.renderHits)
+	require.Equal(t, 1, c.renderHits)
+
+	// Prepending a new item must not shift the existing entries to
+	// the wrong key. The existing items render exactly once more
+	// only if their cache was lost, which would be a bug. Scroll to
+	// the top so the prepended item is visible and gets rendered.
+	z := newTrackedItem("z", "zulu", false)
+	l.PrependItems(z)
+	l.ScrollToTop()
+	_ = l.Render()
+	require.Equal(t, 1, z.renderHits, "prepended item rendered once")
+	require.Equal(t, 1, a.renderHits, "stable item must keep its cached entry across PrependItems")
+	require.Equal(t, 1, b.renderHits, "stable item must keep its cached entry across PrependItems")
+	require.Equal(t, 1, c.renderHits, "stable item must keep its cached entry across PrependItems")
+
+	// AppendItems is symmetric.
+	d := newTrackedItem("d", "delta", false)
+	l.AppendItems(d)
+	_ = l.Render()
+	require.Equal(t, 1, d.renderHits, "appended item rendered once")
+	require.Equal(t, 1, a.renderHits)
+	require.Equal(t, 1, b.renderHits)
+	require.Equal(t, 1, c.renderHits)
+
+	// The output is non-trivial.
+	require.Contains(t, first, "alpha")
+}
+
+// TestList_SetSize_WidthChangeInvalidates covers the F6 invariant
+// that a width change drops every cached entry but a height-only
+// change leaves the cache intact.
+func TestList_SetSize_WidthChangeInvalidates(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", false)
+	b := newTrackedItem("b", "bravo", false)
+
+	l := NewList(a, b)
+	l.SetSize(40, 10)
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits)
+	require.Equal(t, 1, b.renderHits)
+
+	// Height-only change: no invalidation.
+	l.SetSize(40, 20)
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits, "height-only change must keep cache entries")
+	require.Equal(t, 1, b.renderHits, "height-only change must keep cache entries")
+
+	// Width change: every entry invalidates.
+	l.SetSize(80, 20)
+	_ = l.Render()
+	require.Equal(t, 2, a.renderHits, "width change must invalidate cache entries")
+	require.Equal(t, 2, b.renderHits, "width change must invalidate cache entries")
+}
+
+// TestList_RemoveItem_DropsEntry covers the F6 invariant that
+// RemoveItem drops the cache entry for the removed item but leaves
+// the surviving entries in place.
+func TestList_RemoveItem_DropsEntry(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", false)
+	b := newTrackedItem("b", "bravo", false)
+	c := newTrackedItem("c", "charlie", false)
+
+	l := NewList(a, b, c)
+	l.SetSize(40, 10)
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits)
+	require.Equal(t, 1, b.renderHits)
+	require.Equal(t, 1, c.renderHits)
+
+	l.RemoveItem(1) // remove b
+	_ = l.Render()
+	// a and c still cached.
+	require.Equal(t, 1, a.renderHits, "stable item must keep cached entry across RemoveItem")
+	require.Equal(t, 1, c.renderHits, "stable item must keep cached entry across RemoveItem")
+	// The removed item's entry is dropped — verify by re-adding b
+	// and confirming it renders as if fresh.
+	l.AppendItems(b)
+	_ = l.Render()
+	require.Equal(t, 2, b.renderHits, "re-added item must re-render")
+}
+
+// TestList_FrozenItem_NotReRendered covers §4.5.1: items that report
+// Finished() == true on entry creation are marked frozen after the
+// first render and are never re-rendered until width change or
+// version bump.
+func TestList_FrozenItem_NotReRendered(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", true)
+	b := newTrackedItem("b", "bravo", true)
+
+	l := NewList(a, b)
+	l.SetSize(40, 10)
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits, "frozen items render exactly once on first draw")
+	require.Equal(t, 1, b.renderHits, "frozen items render exactly once on first draw")
+
+	// Many subsequent renders must not re-render frozen items.
+	for range 5 {
+		_ = l.Render()
+	}
+	require.Equal(t, 1, a.renderHits, "frozen items must not re-render across redraws")
+	require.Equal(t, 1, b.renderHits, "frozen items must not re-render across redraws")
+}
+
+// TestList_FrozenItem_TransitionsAfterFinish covers §4.5.1: a
+// streaming item that later reports Finished() == true transitions
+// to frozen on the first render after finish.
+func TestList_FrozenItem_TransitionsAfterFinish(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", false) // streaming
+	l := NewList(a)
+	l.SetSize(40, 10)
+
+	// While unfinished, every render rebuilds the cache because the
+	// item's Finished() is false.
+	for range 3 {
+		// Bump the version to simulate a streaming delta.
+		a.Bump()
+		_ = l.Render()
+	}
+	require.Equal(t, 3, a.renderHits)
+
+	// Item finishes; on the next render it freezes.
+	a.finished = true
+	a.Bump()
+	_ = l.Render()
+	require.Equal(t, 4, a.renderHits, "post-finish render still happens once")
+
+	for range 5 {
+		_ = l.Render()
+	}
+	require.Equal(t, 4, a.renderHits, "frozen after finish, no further renders")
+}
+
+// TestList_FrozenItem_VersionBumpUnfreezes covers §4.5.1: a frozen
+// item that gets a version bump (unexpectedly mutated) is unfrozen
+// and re-rendered — no stale output.
+func TestList_FrozenItem_VersionBumpUnfreezes(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", true)
+	l := NewList(a)
+	l.SetSize(40, 10)
+
+	_ = l.Render()
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits)
+
+	a.Bump()
+	_ = l.Render()
+	require.Equal(t, 2, a.renderHits, "version bump must invalidate frozen entry")
+
+	// Re-renders without bumping go back to cache hits.
+	_ = l.Render()
+	_ = l.Render()
+	require.Equal(t, 2, a.renderHits, "post-bump render re-freezes")
+}
+
+// TestList_FrozenItem_ResizeUnfreezes covers §4.5.1: resize
+// invalidates frozen entries.
+func TestList_FrozenItem_ResizeUnfreezes(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", true)
+	l := NewList(a)
+	l.SetSize(40, 10)
+
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits)
+
+	l.SetSize(80, 10)
+	_ = l.Render()
+	require.Equal(t, 2, a.renderHits, "width change must invalidate frozen entry")
+}
+
+// TestList_FrozenItem_SelectionDragUnfreeze covers §4.5.1: an active
+// selection-drag span must un-freeze items inside the range; ending
+// the drag re-freezes them.
+func TestList_FrozenItem_SelectionDragUnfreeze(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", true)
+	b := newTrackedItem("b", "bravo", true)
+	c := newTrackedItem("c", "charlie", true)
+
+	l := NewList(a, b, c)
+	l.SetSize(40, 10)
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits)
+	require.Equal(t, 1, b.renderHits)
+	require.Equal(t, 1, c.renderHits)
+
+	// Begin a selection drag spanning items 0..1. Items inside the
+	// range must re-render (they re-render exactly once because
+	// the un-freeze drops the cached entry, and the selection
+	// suppression keeps them un-frozen until the drag ends).
+	l.BeginSelectionDrag(0, 1)
+	_ = l.Render()
+	require.Equal(t, 2, a.renderHits, "drag-spanned item must re-render once on entering the drag")
+	require.Equal(t, 2, b.renderHits, "drag-spanned item must re-render once on entering the drag")
+	require.Equal(t, 1, c.renderHits, "out-of-range item must remain frozen")
+
+	// While the drag is active, items inside the range are NOT
+	// frozen. Subsequent renders without state changes still
+	// trigger re-renders (because version+width hit but frozen=false
+	// also matches; we still re-use the cache — no, actually with
+	// our implementation we DO cache unfrozen entries by version).
+	_ = l.Render()
+	require.Equal(t, 2, a.renderHits, "unfrozen but version-stable hits the cache")
+	require.Equal(t, 2, b.renderHits, "unfrozen but version-stable hits the cache")
+
+	// End the drag. Items inside the range re-render once and
+	// re-freeze.
+	l.EndSelectionDrag()
+	_ = l.Render()
+	require.Equal(t, 3, a.renderHits, "post-drag render re-freezes the entry")
+	require.Equal(t, 3, b.renderHits, "post-drag render re-freezes the entry")
+
+	// Subsequent renders are cache hits again.
+	for range 3 {
+		_ = l.Render()
+	}
+	require.Equal(t, 3, a.renderHits, "frozen after drag end")
+	require.Equal(t, 3, b.renderHits, "frozen after drag end")
+}
+
+// TestList_RenderOutputStableAcrossDraws is the F6 byte-equality
+// invariant: rendering the same list multiple times must produce the
+// same bytes.
+func TestList_RenderOutputStableAcrossDraws(t *testing.T) {
+	t.Parallel()
+
+	items := make([]Item, 0, 5)
+	for i := range 5 {
+		items = append(items, newTrackedItem(strconv.Itoa(i), "item-"+strconv.Itoa(i), i%2 == 0))
+	}
+	l := NewList(items...)
+	l.SetSize(40, 20)
+
+	first := l.Render()
+	for range 4 {
+		require.Equal(t, first, l.Render(), "render output must be byte-stable across draws")
+	}
+	// And the output is non-trivial.
+	require.True(t, strings.Contains(first, "item-0"))
+}
+
+// TestList_SetItems_PointerOverlapRetainsCache covers F6 §4.5
+// invalidation semantics for SetItems. When the new slice shares
+// some pointers with the previous slice (a typical "swap a few
+// items, keep the rest" scenario), the cache entries for the
+// surviving items must be retained — re-rendering them would defeat
+// the memo. Entries for the items that were removed must be
+// dropped so they can't serve stale output if the same pointer is
+// re-introduced later.
+func TestList_SetItems_PointerOverlapRetainsCache(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", false)
+	b := newTrackedItem("b", "bravo", false)
+	c := newTrackedItem("c", "charlie", false)
+	d := newTrackedItem("d", "delta", false)
+
+	l := NewList(a, b, c)
+	l.SetSize(40, 10)
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits)
+	require.Equal(t, 1, b.renderHits)
+	require.Equal(t, 1, c.renderHits)
+
+	// Replace the slice with one that shares a and c (b is dropped,
+	// d is added). a and c must keep their cache entries; d renders
+	// once on the next draw.
+	l.SetItems(a, c, d)
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits, "stable item must keep cached entry across SetItems")
+	require.Equal(t, 1, c.renderHits, "stable item must keep cached entry across SetItems")
+	require.Equal(t, 1, d.renderHits, "new item renders once")
+
+	// Re-introducing b after it was dropped must rebuild its
+	// entry (its previous cache entry was invalidated by SetItems).
+	l.SetItems(a, b, c)
+	_ = l.Render()
+	require.Equal(t, 2, b.renderHits, "re-introduced item must re-render — its old entry was dropped")
+	// a and c remained throughout both swaps.
+	require.Equal(t, 1, a.renderHits, "stable item retained across multiple SetItems")
+	require.Equal(t, 1, c.renderHits, "stable item retained across multiple SetItems")
+}
+
+// TestList_SetItems_AllNewDropsEveryEntry covers F6 §4.5: when the
+// SetItems slice has no pointer overlap with the previous slice,
+// every cache entry from the previous slice is dropped. This is
+// the pure-replace case (e.g. session switch).
+func TestList_SetItems_AllNewDropsEveryEntry(t *testing.T) {
+	t.Parallel()
+
+	a := newTrackedItem("a", "alpha", false)
+	b := newTrackedItem("b", "bravo", false)
+	c := newTrackedItem("c", "charlie", false)
+
+	l := NewList(a, b, c)
+	l.SetSize(40, 10)
+	_ = l.Render()
+	require.Equal(t, 1, a.renderHits)
+	require.Equal(t, 1, b.renderHits)
+	require.Equal(t, 1, c.renderHits)
+
+	// Replace with a fully disjoint slice. Every entry from the
+	// previous slice must be dropped.
+	x := newTrackedItem("x", "xray", false)
+	y := newTrackedItem("y", "yankee", false)
+	l.SetItems(x, y)
+	_ = l.Render()
+	require.Equal(t, 1, x.renderHits, "new item renders once")
+	require.Equal(t, 1, y.renderHits, "new item renders once")
+
+	// Re-introducing the originals must rebuild every entry.
+	l.SetItems(a, b, c)
+	_ = l.Render()
+	require.Equal(t, 2, a.renderHits, "previously-dropped item must re-render")
+	require.Equal(t, 2, b.renderHits, "previously-dropped item must re-render")
+	require.Equal(t, 2, c.renderHits, "previously-dropped item must re-render")
+}
+
+// TestVersioned_BumpMonotonic covers the basic Versioned contract:
+// Version() starts at zero and Bump() advances it monotonically.
+func TestVersioned_BumpMonotonic(t *testing.T) {
+	t.Parallel()
+
+	v := NewVersioned()
+	require.Equal(t, uint64(0), v.Version())
+	v.Bump()
+	require.Equal(t, uint64(1), v.Version())
+	v.Bump()
+	v.Bump()
+	require.Equal(t, uint64(3), v.Version())
+}
+
+// multiLineItem is a test helper whose Render returns a fixed
+// multi-line body. Each line is uniquely identifiable (id:N) so a
+// test can reconstruct the expected visible window by index. F7's
+// byte-identity matrix is built around these.
+type multiLineItem struct {
+	*Versioned
+	id     string
+	height int
+}
+
+func newMultiLineItem(id string, height int) *multiLineItem {
+	return &multiLineItem{
+		Versioned: NewVersioned(),
+		id:        id,
+		height:    height,
+	}
+}
+
+func (m *multiLineItem) Render(_ int) string {
+	if m.height <= 0 {
+		return ""
+	}
+	parts := make([]string, m.height)
+	for i := range m.height {
+		parts[i] = m.id + ":" + strconv.Itoa(i)
+	}
+	return strings.Join(parts, "\n")
+}
+
+func (m *multiLineItem) Finished() bool { return true }
+
+// expectedRender computes what list.Render *should* produce from
+// first principles given the item heights, viewport, offsetIdx,
+// offsetLine, gap, and reverse settings. It mirrors the pre-F7
+// "build everything, trim to height, reverse" semantics so we can
+// assert byte-identity against the new bounded path.
+func expectedRender(items []*multiLineItem, height, offsetIdx, offsetLine, gap int, reverse bool) string {
+	if len(items) == 0 {
+		return ""
+	}
+	budget := max(height, 0)
+	var lines []string
+	currentOffset := offsetLine
+	for idx := offsetIdx; idx < len(items) && len(lines) < budget; idx++ {
+		body := items[idx].Render(0)
+		body = strings.TrimRight(body, "\n")
+		itemLines := strings.Split(body, "\n")
+		itemHeight := len(itemLines)
+
+		if currentOffset >= 0 && currentOffset < itemHeight {
+			lines = append(lines, itemLines[currentOffset:]...)
+			for range gap {
+				lines = append(lines, "")
+			}
+		} else {
+			gapOffset := currentOffset - itemHeight
+			gapRemaining := gap - gapOffset
+			for range max(gapRemaining, 0) {
+				lines = append(lines, "")
+			}
+		}
+		currentOffset = 0
+	}
+	if len(lines) > budget {
+		lines = lines[:budget]
+	}
+	if reverse {
+		for i, j := 0, len(lines)-1; i < j; i, j = i+1, j-1 {
+			lines[i], lines[j] = lines[j], lines[i]
+		}
+	}
+	return strings.Join(lines, "\n")
+}
+
+// TestList_F7_ByteIdentityMatrix is T1 from the F7 plan: a sweep
+// over (item heights × viewport heights × offsets × gaps × reverse)
+// that asserts list.Render produces output byte-identical to a
+// pre-F7-equivalent reference (build full buffer, trim at end).
+func TestList_F7_ByteIdentityMatrix(t *testing.T) {
+	t.Parallel()
+
+	itemHeights := [][]int{
+		{1},
+		{5},
+		{1, 1, 1},
+		{3, 7, 2},
+		{20, 5, 30},
+		{50, 1, 50, 1},
+	}
+	viewportHeights := []int{1, 3, 5, 10, 25, 100}
+	offsetIdxs := []int{0, 1, 2}
+	offsetLines := []int{0, 1, 4}
+	gaps := []int{0, 1, 3}
+	reverses := []bool{false, true}
+
+	for _, heights := range itemHeights {
+		for _, vh := range viewportHeights {
+			for _, oIdx := range offsetIdxs {
+				if oIdx >= len(heights) {
+					continue
+				}
+				for _, oLine := range offsetLines {
+					maxOffset := heights[oIdx]
+					if oLine >= maxOffset {
+						continue
+					}
+					for _, gap := range gaps {
+						for _, reverse := range reverses {
+							items := make([]*multiLineItem, len(heights))
+							asItems := make([]Item, len(heights))
+							for i, h := range heights {
+								items[i] = newMultiLineItem("i"+strconv.Itoa(i), h)
+								asItems[i] = items[i]
+							}
+							l := NewList(asItems...)
+							l.SetSize(40, vh)
+							l.SetGap(gap)
+							l.SetReverse(reverse)
+							l.offsetIdx = oIdx
+							l.offsetLine = oLine
+
+							got := l.Render()
+							want := expectedRender(items, vh, oIdx, oLine, gap, reverse)
+							require.Equalf(t, want, got,
+								"mismatch heights=%v vh=%d oIdx=%d oLine=%d gap=%d reverse=%v",
+								heights, vh, oIdx, oLine, gap, reverse)
+						}
+					}
+				}
+			}
+		}
+	}
+}
+
+// TestList_F7_GiantItemBoundedRender is T2: a single 10,000-line
+// item with a 50-line viewport. Render must return exactly 50
+// lines — no off-by-one, no trim issue. This is the F7 win in
+// test form: per-frame work is bounded by viewport, not item
+// height.
+func TestList_F7_GiantItemBoundedRender(t *testing.T) {
+	t.Parallel()
+
+	const itemHeight = 10000
+	const viewport = 50
+
+	giant := newMultiLineItem("giant", itemHeight)
+	l := NewList(giant)
+	l.SetSize(40, viewport)
+
+	out := l.Render()
+	got := strings.Count(out, "\n") + 1
+	require.Equal(t, viewport, got, "render output must be exactly viewport lines for an oversized item")
+
+	// And the lines are the prefix of the item starting at
+	// offset 0.
+	lines := strings.Split(out, "\n")
+	for i, line := range lines {
+		require.Equal(t, "giant:"+strconv.Itoa(i), line, "line %d does not match expected slice", i)
+	}
+}
+
+// TestList_F7_GiantItemWithOffsetBoundedRender complements T2 with
+// a non-zero offsetLine so we exercise both the "skip prefix" and
+// "bound suffix" sides of the slice.
+func TestList_F7_GiantItemWithOffsetBoundedRender(t *testing.T) {
+	t.Parallel()
+
+	const itemHeight = 10000
+	const viewport = 50
+	const offset = 1234
+
+	giant := newMultiLineItem("giant", itemHeight)
+	l := NewList(giant)
+	l.SetSize(40, viewport)
+	l.offsetLine = offset
+
+	out := l.Render()
+	lines := strings.Split(out, "\n")
+	require.Len(t, lines, viewport, "render output must be exactly viewport lines for an oversized item")
+	for i, line := range lines {
+		require.Equal(t, "giant:"+strconv.Itoa(offset+i), line, "line %d does not match expected slice", i)
+	}
+}
+
+// TestList_F7_GapOverflow is T3: viewport height 5, two items each
+// 10 lines, gap of 3. Render returns exactly 5 lines and never
+// includes gap rows beyond the viewport.
+func TestList_F7_GapOverflow(t *testing.T) {
+	t.Parallel()
+
+	a := newMultiLineItem("a", 10)
+	b := newMultiLineItem("b", 10)
+	l := NewList(a, b)
+	l.SetSize(40, 5)
+	l.SetGap(3)
+
+	out := l.Render()
+	lines := strings.Split(out, "\n")
+	require.Len(t, lines, 5, "viewport must clamp output to height even with gap rows pending")
+
+	// Gap rows after item a would only appear if the viewport
+	// extended past the first 10 lines, which it doesn't here.
+	for i, line := range lines {
+		require.Equal(t, "a:"+strconv.Itoa(i), line, "line %d", i)
+	}
+}
+
+// TestList_F7_GapOverflow_BoundaryStraddle exercises a viewport
+// that lands inside the gap region between two items: 12 lines
+// viewport, item a height 10, item b height 10, gap 3 — first 10
+// lines from a, then 2 of the 3 gap rows, no b lines yet.
+func TestList_F7_GapOverflow_BoundaryStraddle(t *testing.T) {
+	t.Parallel()
+
+	a := newMultiLineItem("a", 10)
+	b := newMultiLineItem("b", 10)
+	l := NewList(a, b)
+	l.SetSize(40, 12)
+	l.SetGap(3)
+
+	out := l.Render()
+	lines := strings.Split(out, "\n")
+	require.Len(t, lines, 12)
+
+	for i := range 10 {
+		require.Equal(t, "a:"+strconv.Itoa(i), lines[i])
+	}
+	require.Equal(t, "", lines[10])
+	require.Equal(t, "", lines[11])
+}
+
+// TestList_F7_ReverseGiantItem is T4: same bounded-slicing
+// invariant in reverse mode. Reverse mode keeps the same final
+// trim semantics; bounded slicing must produce the same window
+// (just reversed).
+func TestList_F7_ReverseGiantItem(t *testing.T) {
+	t.Parallel()
+
+	const itemHeight = 10000
+	const viewport = 50
+
+	giant := newMultiLineItem("giant", itemHeight)
+	l := NewList(giant)
+	l.SetSize(40, viewport)
+	l.SetReverse(true)
+
+	out := l.Render()
+	lines := strings.Split(out, "\n")
+	require.Len(t, lines, viewport)
+
+	// Expected: the same first-50 slice as the non-reverse path
+	// but reversed.
+	for i, line := range lines {
+		expectedIdx := viewport - 1 - i
+		require.Equal(t, "giant:"+strconv.Itoa(expectedIdx), line, "reverse line %d", i)
+	}
+}
+
+// TestList_F7_OffsetLineAtItemBoundary is T5: offsetLine ==
+// itemHeight lands exactly past the last visible line of the item
+// at offsetIdx. The renderer must not address line N (which does
+// not exist); the visible window starts at the gap rows (when gap
+// > 0) or at the next item (when gap == 0).
+func TestList_F7_OffsetLineAtItemBoundary(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name     string
+		gap      int
+		viewport int
+		want     []string
+	}{
+		{
+			name:     "gap zero jumps straight to next item",
+			gap:      0,
+			viewport: 5,
+			want: []string{
+				"b:0", "b:1", "b:2", "b:3", "b:4",
+			},
+		},
+		{
+			name:     "gap two emits gap rows then next item",
+			gap:      2,
+			viewport: 5,
+			want: []string{
+				"", "", "b:0", "b:1", "b:2",
+			},
+		},
+	}
+
+	const itemHeight = 4
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+
+			a := newMultiLineItem("a", itemHeight)
+			b := newMultiLineItem("b", 10)
+			l := NewList(a, b)
+			l.SetSize(40, tc.viewport)
+			l.SetGap(tc.gap)
+			l.offsetIdx = 0
+			l.offsetLine = itemHeight // exactly at the boundary
+
+			out := l.Render()
+			require.Equal(t, strings.Join(tc.want, "\n"), out)
+		})
+	}
+}
+
+// TestList_F7_OffsetLineInsideGap is T6: offsetLine is one row
+// past the end of the item at offsetIdx, landing inside the gap
+// region. The visible window starts at the second gap row and
+// then continues into the next item.
+func TestList_F7_OffsetLineInsideGap(t *testing.T) {
+	t.Parallel()
+
+	const itemHeight = 4
+	const gap = 3
+	const viewport = 5
+
+	a := newMultiLineItem("a", itemHeight)
+	b := newMultiLineItem("b", 10)
+	l := NewList(a, b)
+	l.SetSize(40, viewport)
+	l.SetGap(gap)
+	l.offsetIdx = 0
+	l.offsetLine = itemHeight + 1 // one row into the gap
+
+	out := l.Render()
+	want := strings.Join([]string{
+		"",    // second gap row
+		"",    // third gap row
+		"b:0", // next item starts
+		"b:1",
+		"b:2",
+	}, "\n")
+	require.Equal(t, want, out)
+}
+
+// TestList_F7_ViewportZeroOrNegative is T7: a non-positive
+// viewport height must produce an empty string with no panic and
+// must normalize l.height to zero (the budget := max(l.height, 0)
+// side effect).
+func TestList_F7_ViewportZeroOrNegative(t *testing.T) {
+	t.Parallel()
+
+	tests := []struct {
+		name   string
+		height int
+	}{
+		{name: "height zero", height: 0},
+		{name: "height negative", height: -1},
+	}
+
+	for _, tc := range tests {
+		t.Run(tc.name, func(t *testing.T) {
+			t.Parallel()
+
+			a := newMultiLineItem("a", 5)
+			l := NewList(a)
+			l.SetSize(40, tc.height)
+
+			out := l.Render()
+			require.Equal(t, "", out, "render must be empty for non-positive viewport")
+			require.Equal(t, 0, l.height, "render must normalize height to zero")
+		})
+	}
+}

internal/ui/model/chat.go 🔗

@@ -63,6 +63,32 @@ type Chat struct {
 	// follow is a flag to indicate whether the view should auto-scroll to
 	// bottom on new messages.
 	follow bool
+
+	// drawCache memoizes the decoded form of the last list.Render output so
+	// repeat frames with byte-identical content skip the per-cell ANSI
+	// reparse that uv.StyledString.Draw performs every call. See F9
+	// (docs/notes/2026-05-12-chat-rendering-perf.md §4.8). Bounded to one
+	// entry; invalidated implicitly by string inequality on the next Draw.
+	drawCache *chatDrawCache
+}
+
+// chatDrawCache holds the pre-decoded form of the last list.Render output.
+// The cache is keyed by the rendered string and the screen's width method
+// (graphemes vs wcwidth pick different decoders inside ultraviolet's
+// printString, so a cached buffer is only valid for the method it was
+// decoded with). The cached buffer is independent of the draw area, so
+// resize / scroll changes that produce the same string still hit. We cannot
+// use uv.StyledString.Lines because it bottoms out at the first iteration
+// against a zero-bounds rectangle (see ultraviolet styled.go line 45 — the
+// shared printString loop's `y >= bounds.Max.Y` exit applies to the
+// line-building branch too). A Buffer of the rendered text's natural
+// dimensions is the cheapest correct shape: StyledString.Draw runs once on
+// miss to populate it, and Buffer.Draw is an O(cells) cell copy with no
+// ANSI re-parse on hit.
+type chatDrawCache struct {
+	rendered string
+	method   ansi.Method
+	buf      uv.ScreenBuffer
 }
 
 // NewChat creates a new instance of [Chat] that handles chat interactions and
@@ -89,8 +115,93 @@ func (m *Chat) Height() int {
 }
 
 // Draw renders the chat UI component to the screen and the given area.
+//
+// The list's rendered output is cached in decoded form (see chatDrawCache) so
+// that frames with byte-identical content skip the ANSI reparse that
+// uv.StyledString.Draw performs on every call. The cache is keyed by the
+// rendered string and the screen's width method; area / scroll changes do not
+// invalidate it.
 func (m *Chat) Draw(scr uv.Screen, area uv.Rectangle) {
-	uv.NewStyledString(m.list.Render()).Draw(scr, area)
+	rendered := m.list.Render()
+	method, ok := scr.WidthMethod().(ansi.Method)
+	if !ok {
+		// Width method isn't an ansi.Method (unlikely in practice — both
+		// TerminalScreen and ScreenBuffer store ansi.Method). Fall back
+		// to the uncached path so behavior matches upstream exactly.
+		uv.NewStyledString(rendered).Draw(scr, area)
+		return
+	}
+	if m.drawCache == nil ||
+		m.drawCache.rendered != rendered ||
+		m.drawCache.method != method {
+		m.drawCache = newChatDrawCache(rendered, method)
+	}
+	drawCachedBuffer(scr, area, m.drawCache.buf)
+}
+
+// newChatDrawCache builds a chatDrawCache for the given rendered string by
+// running uv.StyledString.Draw into a fresh buffer sized to the text's
+// natural bounds under the active width method. This is the only place
+// ANSI decoding happens for cached frames — subsequent draws reuse buf
+// via drawCachedBuffer.
+//
+// We can't use uv.StyledString.Bounds() here: it is hard-coded to
+// ansi.GraphemeWidth, while StyledString.Draw lays cells using the
+// destination buffer's WidthMethod (which we capture in `method`). For
+// strings where graphemes and wcwidth disagree (emoji ZWJ sequences,
+// some CJK, certain combining marks) the two answers diverge, leaving
+// the cached buffer either too small (trailing cells dropped on hit) or
+// too large (dead cells past the live content). Computing dimensions
+// with `method.StringWidth` per line matches what printString tallies
+// cell-by-cell, since both decode ANSI sequences and use the same width
+// method.
+func newChatDrawCache(rendered string, method ansi.Method) *chatDrawCache {
+	w, h := renderedBounds(rendered, method)
+	if w <= 0 {
+		w = 1
+	}
+	if h <= 0 {
+		h = 1
+	}
+	buf := uv.NewScreenBuffer(w, h)
+	buf.Method = method
+	uv.NewStyledString(rendered).Draw(buf, buf.Bounds())
+	return &chatDrawCache{
+		rendered: rendered,
+		method:   method,
+		buf:      buf,
+	}
+}
+
+// renderedBounds returns the (width, height) cell extent of rendered
+// when laid out by method. Width is the widest line's StringWidth (which
+// strips ANSI sequences and tallies cells via method, exactly like
+// printString); height is the line count. Both match what
+// uv.StyledString.Draw will write into a buffer whose WidthMethod is
+// method, so the cache buffer is always sized to fit the live content.
+func renderedBounds(rendered string, method ansi.Method) (w, h int) {
+	for line := range strings.SplitSeq(rendered, "\n") {
+		w = max(w, method.StringWidth(line))
+		h++
+	}
+	return w, h
+}
+
+// drawCachedBuffer blits a previously-decoded buffer into scr at area,
+// mirroring uv.StyledString.Draw's screen-mode behavior for the default
+// Wrap=false, Tail="" case. The clear loop matches StyledString.Draw line
+// 51-56; the buf.Draw call replaces the per-cell ANSI decode that
+// printString does on every uncached frame with a pure cell copy.
+func drawCachedBuffer(scr uv.Screen, area uv.Rectangle, buf uv.ScreenBuffer) {
+	// Clear the area first to match StyledString.Draw — leftover cells
+	// from a previous frame outside the new content must be zeroed,
+	// because Buffer.Draw skips empty cells (it doesn't clear).
+	for y := area.Min.Y; y < area.Max.Y; y++ {
+		for x := area.Min.X; x < area.Max.X; x++ {
+			scr.SetCell(x, y, nil)
+		}
+	}
+	buf.Draw(scr, area)
 }
 
 // SetSize sets the size of the chat view port.

internal/ui/model/chat_draw_cache_test.go 🔗

@@ -0,0 +1,334 @@
+package model
+
+import (
+	"strconv"
+	"testing"
+
+	"github.com/charmbracelet/crush/internal/ui/chat"
+	uv "github.com/charmbracelet/ultraviolet"
+	"github.com/charmbracelet/x/ansi"
+	"github.com/stretchr/testify/require"
+)
+
+// drawTestArea is a fixed area used by the cache tests; it must be at least
+// as large as the test items so we can compare every cell.
+func drawTestArea(width, height int) uv.Rectangle {
+	return uv.Rect(0, 0, width, height)
+}
+
+// renderToBuffer mirrors what Chat.Draw does at the screen layer so tests
+// can assert byte-equivalence with a fresh uv.NewStyledString render.
+func renderToBuffer(t *testing.T, c *Chat, w, h int) string {
+	t.Helper()
+	scr := uv.NewScreenBuffer(w, h)
+	c.Draw(scr, drawTestArea(w, h))
+	return scr.Render()
+}
+
+// TestChatDrawCache_HitOnIdenticalRender asserts that two consecutive Draws
+// against the same list output reuse the same chatDrawCache rather than
+// allocating a fresh one. Both the wrapper pointer and the embedded
+// RenderBuffer pointer must be stable across the second Draw — that's what
+// proves no re-decoding happened.
+func TestChatDrawCache_HitOnIdenticalRender(t *testing.T) {
+	t.Parallel()
+
+	u := newTestUI()
+	u.chat.SetMessages(
+		testMessageItem{id: "a", text: "alpha"},
+		testMessageItem{id: "b", text: "beta"},
+	)
+	u.updateLayoutAndSize()
+
+	w, h := 80, 20
+	_ = renderToBuffer(t, u.chat, w, h)
+	require.NotNil(t, u.chat.drawCache, "first draw should populate cache")
+	firstCache := u.chat.drawCache
+	firstBuf := u.chat.drawCache.buf.RenderBuffer
+
+	_ = renderToBuffer(t, u.chat, w, h)
+	require.Same(t, firstCache, u.chat.drawCache,
+		"identical rendered string must reuse the same cache pointer")
+	require.Same(t, firstBuf, u.chat.drawCache.buf.RenderBuffer,
+		"identical rendered string must reuse the same RenderBuffer pointer")
+}
+
+// TestChatDrawCache_MissOnDifferentRender asserts that when the list output
+// changes, the cache is rebuilt and the new draw output matches a fresh
+// uv.NewStyledString render byte-for-byte.
+func TestChatDrawCache_MissOnDifferentRender(t *testing.T) {
+	t.Parallel()
+
+	u := newTestUI()
+	u.chat.SetMessages(
+		testMessageItem{id: "a", text: "alpha"},
+	)
+	u.updateLayoutAndSize()
+
+	w, h := 80, 20
+	_ = renderToBuffer(t, u.chat, w, h)
+	require.NotNil(t, u.chat.drawCache)
+	firstCache := u.chat.drawCache
+
+	// Replace the items so the rendered string differs.
+	u.chat.SetMessages(
+		testMessageItem{id: "c", text: "gamma delta"},
+	)
+	u.updateLayoutAndSize()
+
+	got := renderToBuffer(t, u.chat, w, h)
+	require.NotSame(t, firstCache, u.chat.drawCache,
+		"changed rendered string must replace the cache entry")
+
+	// Output must match a fresh uv.NewStyledString render of the current
+	// list output for the same area. This is the byte-equivalence guard
+	// that protects against blit drift from StyledString.Draw.
+	want := freshStyledRender(u.chat.list.Render(), w, h)
+	require.Equal(t, want, got)
+}
+
+// TestChatDrawCache_ReusedAcrossDifferentArea asserts that the cached
+// decoded lines do not depend on the screen / area: the same string drawn
+// into a smaller area must hit the cache, and the output must match a fresh
+// uv.NewStyledString render against the new area.
+func TestChatDrawCache_ReusedAcrossDifferentArea(t *testing.T) {
+	t.Parallel()
+
+	u := newTestUI()
+	u.chat.SetMessages(
+		testMessageItem{id: "a", text: "alpha"},
+		testMessageItem{id: "b", text: "beta"},
+	)
+	u.updateLayoutAndSize()
+
+	w, h := 80, 20
+	_ = renderToBuffer(t, u.chat, w, h)
+	require.NotNil(t, u.chat.drawCache)
+	firstCache := u.chat.drawCache
+	firstBuf := u.chat.drawCache.buf.RenderBuffer
+
+	// Draw the same content into a smaller buffer. The list output is
+	// width-sensitive in production (list.Render depends on width), so we
+	// hold the list width fixed via updateLayoutAndSize and only shrink
+	// the destination area. The cache key is the rendered string + width
+	// method, both unchanged here.
+	smallScr := uv.NewScreenBuffer(40, 10)
+	u.chat.Draw(smallScr, uv.Rect(0, 0, 40, 10))
+
+	require.Same(t, firstCache, u.chat.drawCache,
+		"smaller area must still hit the cache")
+	require.Same(t, firstBuf, u.chat.drawCache.buf.RenderBuffer,
+		"cached RenderBuffer must be reused across area changes")
+
+	want := freshStyledRender(u.chat.list.Render(), 40, 10)
+	require.Equal(t, want, smallScr.Render())
+}
+
+// TestChatDrawCache_BoundedSize asserts the draw cache holds at most one
+// entry. This is structural — any future change that introduces an LRU or
+// ring would have to update this test.
+func TestChatDrawCache_BoundedSize(t *testing.T) {
+	t.Parallel()
+
+	u := newTestUI()
+	w, h := 80, 20
+
+	// Cycle through several distinct list outputs and confirm that
+	// drawCache is still a single *chatDrawCache pointing at the most
+	// recent rendered string each time.
+	for i := range 5 {
+		u.chat.SetMessages(
+			testMessageItem{id: "x", text: "tick " + strconv.Itoa(i)},
+		)
+		u.updateLayoutAndSize()
+		_ = renderToBuffer(t, u.chat, w, h)
+		require.NotNil(t, u.chat.drawCache)
+		require.Equal(t, u.chat.list.Render(), u.chat.drawCache.rendered,
+			"cache.rendered must always match the most recent list output")
+	}
+
+	// Sanity: the type of drawCache is a single pointer, not a slice/map.
+	// This is enforced at compile time but checking it explicitly here
+	// keeps the bounded-size invariant visible to future readers.
+	require.IsType(t, (*chatDrawCache)(nil), u.chat.drawCache)
+}
+
+// freshStyledRender renders the same string through uv.NewStyledString into
+// a fresh ScreenBuffer of the given size. It's the reference implementation
+// used by the cache tests to detect blit drift.
+func freshStyledRender(s string, w, h int) string {
+	scr := uv.NewScreenBuffer(w, h)
+	uv.NewStyledString(s).Draw(scr, uv.Rect(0, 0, w, h))
+	return scr.Render()
+}
+
+// TestChatDrawCache_InvalidatedByWidthMethodSwap asserts the cache rebuilds
+// when the destination screen's width method changes between frames.
+// GraphemeWidth and WcWidth disagree on emoji ZWJ sequences and other
+// modern unicode, so the decoded buffer is only valid for the method it
+// was decoded with. Reusing across methods would corrupt cell widths.
+func TestChatDrawCache_InvalidatedByWidthMethodSwap(t *testing.T) {
+	t.Parallel()
+
+	u := newTestUI()
+	// Use content where the two methods disagree so any accidental
+	// reuse would surface as a different rendered cell layout. The
+	// woman-technologist ZWJ sequence is one cell under GraphemeWidth
+	// but two under WcWidth (the components combine).
+	u.chat.SetMessages(
+		testMessageItem{id: "a", text: "hi 👩\u200d💻 there"},
+	)
+	u.updateLayoutAndSize()
+
+	w, h := 80, 5
+
+	// First draw under GraphemeWidth.
+	scrA := uv.ScreenBuffer{
+		RenderBuffer: uv.NewRenderBuffer(w, h),
+		Method:       ansi.GraphemeWidth,
+	}
+	u.chat.Draw(scrA, uv.Rect(0, 0, w, h))
+	require.NotNil(t, u.chat.drawCache)
+	require.Equal(t, ansi.GraphemeWidth, u.chat.drawCache.method)
+	firstCache := u.chat.drawCache
+
+	// Same string, swap the method. The cache key includes method, so
+	// this must rebuild even though `rendered` is byte-identical.
+	scrB := uv.ScreenBuffer{
+		RenderBuffer: uv.NewRenderBuffer(w, h),
+		Method:       ansi.WcWidth,
+	}
+	u.chat.Draw(scrB, uv.Rect(0, 0, w, h))
+	require.NotSame(t, firstCache, u.chat.drawCache,
+		"width method change must invalidate the cache")
+	require.Equal(t, ansi.WcWidth, u.chat.drawCache.method)
+}
+
+// fixedMethodScreen wraps a uv.ScreenBuffer but reports a custom (non
+// ansi.Method) WidthMethod implementation. Chat.Draw is expected to
+// detect the type-assertion miss and fall through to the uncached path.
+type fixedMethodScreen struct {
+	uv.ScreenBuffer
+	method uv.WidthMethod
+}
+
+func (s fixedMethodScreen) WidthMethod() uv.WidthMethod { return s.method }
+
+// customWidth is a WidthMethod whose concrete type is intentionally NOT
+// ansi.Method, so the type assertion in Chat.Draw fails. The actual
+// width math just delegates to ansi.GraphemeWidth so the rendered
+// output is well-defined and comparable.
+type customWidth struct{}
+
+func (customWidth) StringWidth(s string) int {
+	return ansi.GraphemeWidth.StringWidth(s)
+}
+
+// TestChatDrawCache_FallbackOnNonAnsiMethod asserts that when the screen
+// reports a WidthMethod whose concrete type is not ansi.Method, Chat.Draw
+// skips the cache (it has no comparable key for an arbitrary interface)
+// and falls through to a direct uv.NewStyledString.Draw, producing the
+// same output as the upstream uncached path.
+func TestChatDrawCache_FallbackOnNonAnsiMethod(t *testing.T) {
+	t.Parallel()
+
+	u := newTestUI()
+	u.chat.SetMessages(
+		testMessageItem{id: "a", text: "alpha"},
+		testMessageItem{id: "b", text: "beta"},
+	)
+	u.updateLayoutAndSize()
+
+	w, h := 40, 10
+	scr := fixedMethodScreen{
+		ScreenBuffer: uv.ScreenBuffer{
+			RenderBuffer: uv.NewRenderBuffer(w, h),
+			Method:       ansi.GraphemeWidth,
+		},
+		method: customWidth{},
+	}
+	// Sanity: the wrapper actually returns a non-ansi.Method type so
+	// the fallback path is exercised end-to-end.
+	_, isAnsiMethod := scr.WidthMethod().(ansi.Method)
+	require.False(t, isAnsiMethod,
+		"test setup must hand Draw a non-ansi.Method WidthMethod")
+
+	u.chat.Draw(scr, uv.Rect(0, 0, w, h))
+
+	// The fallback path uses uv.NewStyledString(rendered).Draw — same
+	// thing freshStyledRender does — so output must match the
+	// reference render byte-for-byte.
+	want := freshStyledRender(u.chat.list.Render(), w, h)
+	require.Equal(t, want, scr.Render())
+
+	// And no cache was populated (or if it was, it wasn't used for
+	// this draw). Either way: drawing through the fallback must never
+	// leave a stale cache that future draws would reuse incorrectly.
+	require.Nil(t, u.chat.drawCache,
+		"fallback path must not populate the cache")
+}
+
+// TestRenderedBounds_MatchesPrintStringTallyForZWJ pins the invariant
+// that renderedBounds (used to size the cache buffer) returns the same
+// width as the cell tally StyledString.Draw will write into a buffer
+// using the same WidthMethod. We compare the computed width to a
+// reference render: lay the string into an oversized buffer, then count
+// non-empty cells per row. A divergent-width sample (woman-technologist
+// ZWJ sequence) is the canonical case that would expose a Bounds()
+// vs printString mismatch — under GraphemeWidth the sequence is one
+// cell, under WcWidth it's two — so the same string must produce two
+// different widths under the two methods, both matching the tally.
+func TestRenderedBounds_MatchesPrintStringTallyForZWJ(t *testing.T) {
+	t.Parallel()
+
+	const sample = "x 👩\u200d💻 y"
+
+	for _, m := range []ansi.Method{ansi.GraphemeWidth, ansi.WcWidth} {
+		w, h := renderedBounds(sample, m)
+		// Tally what printString would actually write.
+		// Use an oversized buffer so nothing gets clipped.
+		scr := uv.ScreenBuffer{
+			RenderBuffer: uv.NewRenderBuffer(64, 4),
+			Method:       m,
+		}
+		uv.NewStyledString(sample).Draw(scr, uv.Rect(0, 0, 64, 4))
+
+		// Tally width: rightmost non-empty cell + its width, per row.
+		gotW := 0
+		gotH := 0
+		for y := 0; y < 4; y++ {
+			rowW := 0
+			rowHasContent := false
+			for x := 0; x < 64; x++ {
+				cell := scr.CellAt(x, y)
+				if cell == nil || cell.IsZero() ||
+					cell.Content == " " && cell.Width == 1 &&
+						cell.Style == (uv.Style{}) {
+					continue
+				}
+				rowHasContent = true
+				if x+cell.Width > rowW {
+					rowW = x + cell.Width
+				}
+			}
+			if rowHasContent {
+				gotH = y + 1
+				if rowW > gotW {
+					gotW = rowW
+				}
+			}
+		}
+
+		require.Equal(t, gotW, w,
+			"renderedBounds width must match printString cell tally for method %v", m)
+		// Height is just '\n' count + 1 — no special-casing needed
+		// here, but pin it so future regressions on multi-line
+		// inputs don't sneak by.
+		require.Equal(t, gotH, h,
+			"renderedBounds height must match printString row tally for method %v", m)
+	}
+}
+
+// Compile-time guard: testMessageItem is reused from layout_test.go and
+// satisfies chat.MessageItem there. We rely on the same shape here.
+var _ chat.MessageItem = testMessageItem{}

internal/ui/model/chat_expand_test.go 🔗

@@ -20,7 +20,13 @@ func TestChatToggleExpandedSelectedItem_AssistantMessage(t *testing.T) {
 
 	u := newTestUI()
 
-	msg := &message.Message{ID: "m-assist", Role: message.Assistant}
+	msg := &message.Message{
+		ID:   "m-assist",
+		Role: message.Assistant,
+		Parts: []message.ContentPart{
+			message.ReasoningContent{Thinking: "thinking about it"},
+		},
+	}
 	item := chat.NewAssistantMessageItem(u.com.Styles, msg)
 
 	// The keyboard expand path uses the generic Expandable interface;

internal/ui/model/layout_test.go 🔗

@@ -20,6 +20,8 @@ type testMessageItem struct {
 func (m testMessageItem) ID() string           { return m.id }
 func (m testMessageItem) Render(int) string    { return m.text }
 func (m testMessageItem) RawRender(int) string { return m.text }
+func (m testMessageItem) Version() uint64      { return 0 }
+func (m testMessageItem) Finished() bool       { return true }
 
 var _ chat.MessageItem = testMessageItem{}

internal/workspace/app_workspace.go 🔗

@@ -70,6 +70,12 @@ func (w *AppWorkspace) ParseAgentToolSessionID(sessionID string) (string, string
 // -- Messages --
 
 func (w *AppWorkspace) ListMessages(ctx context.Context, sessionID string) ([]message.Message, error) {
+	// Drain any debounced updates so the caller observes the latest
+	// in-memory state. message.Service buffers streaming deltas and a
+	// cold List would otherwise miss them at session-switch time.
+	if err := w.app.Messages.FlushAll(ctx); err != nil {
+		return nil, err
+	}
 	return w.app.Messages.List(ctx, sessionID)
 }